VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 67037

Last change on this file since 67037 was 67037, checked in by vboxsync, 8 years ago

IEM: Implemented vmovntdq Mx,Vx (VEX.66.0F e7 mod!=3).

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 146.8 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 67037 2017-05-23 11:31:06Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 67037 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', ),
217 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', ),
218 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', ),
219 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', ),
220 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
221 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
222 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', ),
223 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', ),
224 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
225 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
226 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
227 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
228 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
229 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
230 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
231 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
232 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
233 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
234 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
235 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
236 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
237 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', ),
238 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', ),
239
240 # ModR/M.rm - register only.
241 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
242 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
243 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
244 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
245 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
246 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
247 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', ),
248
249 # ModR/M.rm - memory only.
250 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
251 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
252 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
253 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
254 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
255 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', ),
256 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
257 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
258 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', ),
259 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', ),
260 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', ),
261 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
262 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
263
264 # ModR/M.reg
265 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
266 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
267 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
268 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
269 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', ),
270 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', ),
271 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', ),
272 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', ),
273 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', ),
274 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', ),
275 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', ),
276 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', ),
277 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
278 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
279 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
280 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
281 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
282 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
283 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
284 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
285 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
286 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
287 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
288 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
289 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', ),
290 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
291 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
292 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', ),
293 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', ),
294 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', ),
295
296 # VEX.vvvv
297 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', ),
298 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', ),
299 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', ),
300
301 # Immediate values.
302 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
303 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
304 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
305 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
306 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
307 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
308
309 # Address operands (no ModR/M).
310 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
311 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
312
313 # Relative jump targets
314 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
315 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
316
317 # DS:rSI
318 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
319 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
320 # ES:rDI
321 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
322 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
323
324 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
325
326 # Fixed registers.
327 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
328 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
329 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
330 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
331 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
332 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
333 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
334 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
335};
336
337# IDX_ParseFixedReg
338# IDX_ParseVexDest
339
340
341## IEMFORM_XXX mappings.
342g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
343 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
344 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
345 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
346 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
347 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
348 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
349 'M': ( 'ModR/M', [ 'rm', ], ),
350 'M_REG': ( 'ModR/M', [ 'rm', ], ),
351 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
352 'R': ( 'ModR/M', [ 'reg', ], ),
353
354 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
355 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
356 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
357 'VEX_XM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ), # same as VEX_RM_MEM, typo?
358 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
359 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
360 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
361 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
362 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
363 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
364 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
365 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
366 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
367 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
368 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
369 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
370 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
371
372 'FIXED': ( 'fixed', None, )
373};
374
375## \@oppfx values.
376g_kdPrefixes = {
377 'none': [],
378 '0x66': [],
379 '0xf3': [],
380 '0xf2': [],
381};
382
383## Special \@opcode tag values.
384g_kdSpecialOpcodes = {
385 '/reg': [],
386 'mr/reg': [],
387 '11 /reg': [],
388 '!11 /reg': [],
389 '11 mr/reg': [],
390 '!11 mr/reg': [],
391};
392
393## Special \@opcodesub tag values.
394g_kdSubOpcodes = {
395 'none': [ None, ],
396 '11 mr/reg': [ '11 mr/reg', ],
397 '11': [ '11 mr/reg', ], ##< alias
398 '!11 mr/reg': [ '!11 mr/reg', ],
399 '!11': [ '!11 mr/reg', ], ##< alias
400 'rex.w=0': [ 'rex.w=0', ],
401 'w=0': [ 'rex.w=0', ], ##< alias
402 'rex.w=1': [ 'rex.w=1', ],
403 'w=1': [ 'rex.w=1', ], ##< alias
404};
405
406## Valid values for \@openc
407g_kdEncodings = {
408 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
409 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
410 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
411 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
412 'prefix': [ None, ], ##< Prefix
413};
414
415## \@opunused, \@opinvalid, \@opinvlstyle
416g_kdInvalidStyles = {
417 'immediate': [], ##< CPU stops decoding immediately after the opcode.
418 'vex.modrm': [], ##< VEX+ModR/M, everyone.
419 'intel-modrm': [], ##< Intel decodes ModR/M.
420 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
421 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
422 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
423};
424
425g_kdCpuNames = {
426 '8086': (),
427 '80186': (),
428 '80286': (),
429 '80386': (),
430 '80486': (),
431};
432
433## \@opcpuid
434g_kdCpuIdFlags = {
435 'vme': 'X86_CPUID_FEATURE_EDX_VME',
436 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
437 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
438 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
439 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
440 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
441 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
442 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
443 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
444 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
445 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
446 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
447 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
448 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
449 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
450 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
451 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
452 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
453 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
454 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
455 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
456 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
457 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
458 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
459 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
460 'aes': 'X86_CPUID_FEATURE_ECX_AES',
461 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
462 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
463 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
464 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
465
466 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
467 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
468 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
469 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
470 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
471 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
472 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
473 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
474 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
475 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
476};
477
478## \@ophints values.
479g_kdHints = {
480 'invalid': 'DISOPTYPE_INVALID', ##<
481 'harmless': 'DISOPTYPE_HARMLESS', ##<
482 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
483 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
484 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
485 'portio': 'DISOPTYPE_PORTIO', ##<
486 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
487 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
488 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
489 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
490 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
491 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
492 'illegal': 'DISOPTYPE_ILLEGAL', ##<
493 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
494 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
495 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
496 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
497 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
498 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
499 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
500 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
501 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
502 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
503 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
504 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
505 ## (only in 16 & 32 bits mode!)
506 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
507 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
508 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
509 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
510 'ignores_rexw': '', ##< Ignores REX.W.
511 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
512 'ignores_vex_l': '', ##< Ignores VEX.L.
513 'vex_l_zero': '', ##< VEX.L must be 0.
514 'lock_allowed': '', ##< Lock prefix allowed.
515};
516
517## \@opxcpttype values (see SDMv2 2.4, 2.7).
518g_kdXcptTypes = {
519 'none': [],
520 '1': [],
521 '2': [],
522 '3': [],
523 '4': [],
524 '4UA': [],
525 '5': [],
526 '5LZ': [], # LZ = VEX.L must be zero.
527 '6': [],
528 '7': [],
529 '7LZ': [],
530 '8': [],
531 '11': [],
532 '12': [],
533 'E1': [],
534 'E1NF': [],
535 'E2': [],
536 'E3': [],
537 'E3NF': [],
538 'E4': [],
539 'E4NF': [],
540 'E5': [],
541 'E5NF': [],
542 'E6': [],
543 'E6NF': [],
544 'E7NF': [],
545 'E9': [],
546 'E9NF': [],
547 'E10': [],
548 'E11': [],
549 'E12': [],
550 'E12NF': [],
551};
552
553
554def _isValidOpcodeByte(sOpcode):
555 """
556 Checks if sOpcode is a valid lower case opcode byte.
557 Returns true/false.
558 """
559 if len(sOpcode) == 4:
560 if sOpcode[:2] == '0x':
561 if sOpcode[2] in '0123456789abcdef':
562 if sOpcode[3] in '0123456789abcdef':
563 return True;
564 return False;
565
566
567class InstructionMap(object):
568 """
569 Instruction map.
570
571 The opcode map provides the lead opcode bytes (empty for the one byte
572 opcode map). An instruction can be member of multiple opcode maps as long
573 as it uses the same opcode value within the map (because of VEX).
574 """
575
576 kdEncodings = {
577 'legacy': [],
578 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
579 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
580 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
581 'xop8': [], ##< XOP prefix with vvvvv = 8
582 'xop9': [], ##< XOP prefix with vvvvv = 9
583 'xop10': [], ##< XOP prefix with vvvvv = 10
584 };
585 ## Selectors.
586 ## The first value is the number of table entries required by a
587 ## decoder or disassembler for this type of selector.
588 kdSelectors = {
589 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
590 '/r': [ 8, ], ##< modrm.reg selects the instruction.
591 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
592 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
593 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
594 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
595 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
596 };
597
598 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
599 assert sSelector in self.kdSelectors;
600 assert sEncoding in self.kdEncodings;
601 if asLeadOpcodes is None:
602 asLeadOpcodes = [];
603 else:
604 for sOpcode in asLeadOpcodes:
605 assert _isValidOpcodeByte(sOpcode);
606 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
607
608 self.sName = sName;
609 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
610 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
611 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
612 self.aoInstructions = []; # type: Instruction
613 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
614
615 def getTableSize(self):
616 """
617 Number of table entries. This corresponds directly to the selector.
618 """
619 return self.kdSelectors[self.sSelector][0];
620
621 def getInstructionIndex(self, oInstr):
622 """
623 Returns the table index for the instruction.
624 """
625 bOpcode = oInstr.getOpcodeByte();
626
627 # The byte selector is simple. We need a full opcode byte and need just return it.
628 if self.sSelector == 'byte':
629 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
630 return bOpcode;
631
632 # The other selectors needs masking and shifting.
633 if self.sSelector == '/r':
634 return (bOpcode >> 3) & 0x7;
635
636 if self.sSelector == 'mod /r':
637 return (bOpcode >> 3) & 0x1f;
638
639 if self.sSelector == 'memreg /r':
640 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
641
642 if self.sSelector == '!11 /r':
643 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
644 return (bOpcode >> 3) & 0x7;
645
646 if self.sSelector == '11 /r':
647 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
648 return (bOpcode >> 3) & 0x7;
649
650 if self.sSelector == '11':
651 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
652 return bOpcode & 0x3f;
653
654 assert False, self.sSelector;
655 return -1;
656
657 def getInstructionsInTableOrder(self):
658 """
659 Get instructions in table order.
660
661 Returns array of instructions. Normally there is exactly one
662 instruction per entry. However the entry could also be None if
663 not instruction was specified for that opcode value. Or there
664 could be a list of instructions to deal with special encodings
665 where for instance prefix (e.g. REX.W) encodes a different
666 instruction or different CPUs have different instructions or
667 prefixes in the same place.
668 """
669 # Start with empty table.
670 cTable = self.getTableSize();
671 aoTable = [None] * cTable;
672
673 # Insert the instructions.
674 for oInstr in self.aoInstructions:
675 if oInstr.sOpcode:
676 idxOpcode = self.getInstructionIndex(oInstr);
677 assert idxOpcode < cTable, str(idxOpcode);
678
679 oExisting = aoTable[idxOpcode];
680 if oExisting is None:
681 aoTable[idxOpcode] = oInstr;
682 elif not isinstance(oExisting, list):
683 aoTable[idxOpcode] = list([oExisting, oInstr]);
684 else:
685 oExisting.append(oInstr);
686
687 return aoTable;
688
689
690 def getDisasTableName(self):
691 """
692 Returns the disassembler table name for this map.
693 """
694 sName = 'g_aDisas';
695 for sWord in self.sName.split('_'):
696 if sWord == 'm': # suffix indicating modrm.mod==mem
697 sName += '_m';
698 elif sWord == 'r': # suffix indicating modrm.mod==reg
699 sName += '_r';
700 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
701 sName += '_' + sWord;
702 else:
703 sWord = sWord.replace('grp', 'Grp');
704 sWord = sWord.replace('map', 'Map');
705 sName += sWord[0].upper() + sWord[1:];
706 return sName;
707
708
709 def isVexMap(self):
710 """ Returns True if a VEX map. """
711 return self.sEncoding.startswith('vex');
712
713
714class TestType(object):
715 """
716 Test value type.
717
718 This base class deals with integer like values. The fUnsigned constructor
719 parameter indicates the default stance on zero vs sign extending. It is
720 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
721 """
722 def __init__(self, sName, acbSizes = None, fUnsigned = True):
723 self.sName = sName;
724 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
725 self.fUnsigned = fUnsigned;
726
727 class BadValue(Exception):
728 """ Bad value exception. """
729 def __init__(self, sMessage):
730 Exception.__init__(self, sMessage);
731 self.sMessage = sMessage;
732
733 ## For ascii ~ operator.
734 kdHexInv = {
735 '0': 'f',
736 '1': 'e',
737 '2': 'd',
738 '3': 'c',
739 '4': 'b',
740 '5': 'a',
741 '6': '9',
742 '7': '8',
743 '8': '7',
744 '9': '6',
745 'a': '5',
746 'b': '4',
747 'c': '3',
748 'd': '2',
749 'e': '1',
750 'f': '0',
751 };
752
753 def get(self, sValue):
754 """
755 Get the shortest normal sized byte representation of oValue.
756
757 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
758 The latter form is for AND+OR pairs where the first entry is what to
759 AND with the field and the second the one or OR with.
760
761 Raises BadValue if invalid value.
762 """
763 if not sValue:
764 raise TestType.BadValue('empty value');
765
766 # Deal with sign and detect hexadecimal or decimal.
767 fSignExtend = not self.fUnsigned;
768 if sValue[0] == '-' or sValue[0] == '+':
769 fSignExtend = True;
770 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
771 else:
772 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
773
774 # try convert it to long integer.
775 try:
776 iValue = long(sValue, 16 if fHex else 10);
777 except Exception as oXcpt:
778 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
779
780 # Convert the hex string and pad it to a decent value. Negative values
781 # needs to be manually converted to something non-negative (~-n + 1).
782 if iValue >= 0:
783 sHex = hex(iValue);
784 if sys.version_info[0] < 3:
785 assert sHex[-1] == 'L';
786 sHex = sHex[:-1];
787 assert sHex[:2] == '0x';
788 sHex = sHex[2:];
789 else:
790 sHex = hex(-iValue - 1);
791 if sys.version_info[0] < 3:
792 assert sHex[-1] == 'L';
793 sHex = sHex[:-1];
794 assert sHex[:2] == '0x';
795 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
796 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
797 sHex = 'f' + sHex;
798
799 cDigits = len(sHex);
800 if cDigits <= self.acbSizes[-1] * 2:
801 for cb in self.acbSizes:
802 cNaturalDigits = cb * 2;
803 if cDigits <= cNaturalDigits:
804 break;
805 else:
806 cNaturalDigits = self.acbSizes[-1] * 2;
807 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
808 assert isinstance(cNaturalDigits, int)
809
810 if cNaturalDigits != cDigits:
811 cNeeded = cNaturalDigits - cDigits;
812 if iValue >= 0:
813 sHex = ('0' * cNeeded) + sHex;
814 else:
815 sHex = ('f' * cNeeded) + sHex;
816
817 # Invert and convert to bytearray and return it.
818 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
819
820 return ((fSignExtend, abValue),);
821
822 def validate(self, sValue):
823 """
824 Returns True if value is okay, error message on failure.
825 """
826 try:
827 self.get(sValue);
828 except TestType.BadValue as oXcpt:
829 return oXcpt.sMessage;
830 return True;
831
832 def isAndOrPair(self, sValue):
833 """
834 Checks if sValue is a pair.
835 """
836 _ = sValue;
837 return False;
838
839
840class TestTypeEflags(TestType):
841 """
842 Special value parsing for EFLAGS/RFLAGS/FLAGS.
843 """
844
845 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
846
847 def __init__(self, sName):
848 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
849
850 def get(self, sValue):
851 fClear = 0;
852 fSet = 0;
853 for sFlag in sValue.split(','):
854 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
855 if sConstant is None:
856 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
857 if sConstant[0] == '!':
858 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
859 else:
860 fSet |= g_kdX86EFlagsConstants[sConstant];
861
862 aoSet = TestType.get(self, '0x%x' % (fSet,));
863 if fClear != 0:
864 aoClear = TestType.get(self, '%#x' % (fClear,))
865 assert self.isAndOrPair(sValue) is True;
866 return (aoClear[0], aoSet[0]);
867 assert self.isAndOrPair(sValue) is False;
868 return aoSet;
869
870 def isAndOrPair(self, sValue):
871 for sZeroFlag in self.kdZeroValueFlags:
872 if sValue.find(sZeroFlag) >= 0:
873 return True;
874 return False;
875
876class TestTypeFromDict(TestType):
877 """
878 Special value parsing for CR0.
879 """
880
881 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
882
883 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
884 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
885 self.kdConstantsAndValues = kdConstantsAndValues;
886 self.sConstantPrefix = sConstantPrefix;
887
888 def get(self, sValue):
889 fValue = 0;
890 for sFlag in sValue.split(','):
891 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
892 if fFlagValue is None:
893 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
894 fValue |= fFlagValue;
895 return TestType.get(self, '0x%x' % (fValue,));
896
897
898class TestInOut(object):
899 """
900 One input or output state modifier.
901
902 This should be thought as values to modify BS3REGCTX and extended (needs
903 to be structured) state.
904 """
905 ## Assigned operators.
906 kasOperators = [
907 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
908 '&~=',
909 '&=',
910 '|=',
911 '='
912 ];
913 ## Types
914 kdTypes = {
915 'uint': TestType('uint', fUnsigned = True),
916 'int': TestType('int'),
917 'efl': TestTypeEflags('efl'),
918 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
919 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
920 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
921 };
922 ## CPU context fields.
923 kdFields = {
924 # name: ( default type, [both|input|output], )
925 # Operands.
926 'op1': ( 'uint', 'both', ), ## \@op1
927 'op2': ( 'uint', 'both', ), ## \@op2
928 'op3': ( 'uint', 'both', ), ## \@op3
929 'op4': ( 'uint', 'both', ), ## \@op4
930 # Flags.
931 'efl': ( 'efl', 'both', ),
932 'efl_undef': ( 'uint', 'output', ),
933 # 8-bit GPRs.
934 'al': ( 'uint', 'both', ),
935 'cl': ( 'uint', 'both', ),
936 'dl': ( 'uint', 'both', ),
937 'bl': ( 'uint', 'both', ),
938 'ah': ( 'uint', 'both', ),
939 'ch': ( 'uint', 'both', ),
940 'dh': ( 'uint', 'both', ),
941 'bh': ( 'uint', 'both', ),
942 'r8l': ( 'uint', 'both', ),
943 'r9l': ( 'uint', 'both', ),
944 'r10l': ( 'uint', 'both', ),
945 'r11l': ( 'uint', 'both', ),
946 'r12l': ( 'uint', 'both', ),
947 'r13l': ( 'uint', 'both', ),
948 'r14l': ( 'uint', 'both', ),
949 'r15l': ( 'uint', 'both', ),
950 # 16-bit GPRs.
951 'ax': ( 'uint', 'both', ),
952 'dx': ( 'uint', 'both', ),
953 'cx': ( 'uint', 'both', ),
954 'bx': ( 'uint', 'both', ),
955 'sp': ( 'uint', 'both', ),
956 'bp': ( 'uint', 'both', ),
957 'si': ( 'uint', 'both', ),
958 'di': ( 'uint', 'both', ),
959 'r8w': ( 'uint', 'both', ),
960 'r9w': ( 'uint', 'both', ),
961 'r10w': ( 'uint', 'both', ),
962 'r11w': ( 'uint', 'both', ),
963 'r12w': ( 'uint', 'both', ),
964 'r13w': ( 'uint', 'both', ),
965 'r14w': ( 'uint', 'both', ),
966 'r15w': ( 'uint', 'both', ),
967 # 32-bit GPRs.
968 'eax': ( 'uint', 'both', ),
969 'edx': ( 'uint', 'both', ),
970 'ecx': ( 'uint', 'both', ),
971 'ebx': ( 'uint', 'both', ),
972 'esp': ( 'uint', 'both', ),
973 'ebp': ( 'uint', 'both', ),
974 'esi': ( 'uint', 'both', ),
975 'edi': ( 'uint', 'both', ),
976 'r8d': ( 'uint', 'both', ),
977 'r9d': ( 'uint', 'both', ),
978 'r10d': ( 'uint', 'both', ),
979 'r11d': ( 'uint', 'both', ),
980 'r12d': ( 'uint', 'both', ),
981 'r13d': ( 'uint', 'both', ),
982 'r14d': ( 'uint', 'both', ),
983 'r15d': ( 'uint', 'both', ),
984 # 64-bit GPRs.
985 'rax': ( 'uint', 'both', ),
986 'rdx': ( 'uint', 'both', ),
987 'rcx': ( 'uint', 'both', ),
988 'rbx': ( 'uint', 'both', ),
989 'rsp': ( 'uint', 'both', ),
990 'rbp': ( 'uint', 'both', ),
991 'rsi': ( 'uint', 'both', ),
992 'rdi': ( 'uint', 'both', ),
993 'r8': ( 'uint', 'both', ),
994 'r9': ( 'uint', 'both', ),
995 'r10': ( 'uint', 'both', ),
996 'r11': ( 'uint', 'both', ),
997 'r12': ( 'uint', 'both', ),
998 'r13': ( 'uint', 'both', ),
999 'r14': ( 'uint', 'both', ),
1000 'r15': ( 'uint', 'both', ),
1001 # 16-bit, 32-bit or 64-bit registers according to operand size.
1002 'oz.rax': ( 'uint', 'both', ),
1003 'oz.rdx': ( 'uint', 'both', ),
1004 'oz.rcx': ( 'uint', 'both', ),
1005 'oz.rbx': ( 'uint', 'both', ),
1006 'oz.rsp': ( 'uint', 'both', ),
1007 'oz.rbp': ( 'uint', 'both', ),
1008 'oz.rsi': ( 'uint', 'both', ),
1009 'oz.rdi': ( 'uint', 'both', ),
1010 'oz.r8': ( 'uint', 'both', ),
1011 'oz.r9': ( 'uint', 'both', ),
1012 'oz.r10': ( 'uint', 'both', ),
1013 'oz.r11': ( 'uint', 'both', ),
1014 'oz.r12': ( 'uint', 'both', ),
1015 'oz.r13': ( 'uint', 'both', ),
1016 'oz.r14': ( 'uint', 'both', ),
1017 'oz.r15': ( 'uint', 'both', ),
1018 # Control registers.
1019 'cr0': ( 'cr0', 'both', ),
1020 'cr4': ( 'cr4', 'both', ),
1021 'xcr0': ( 'xcr0', 'both', ),
1022 # FPU Registers
1023 'fcw': ( 'uint', 'both', ),
1024 'fsw': ( 'uint', 'both', ),
1025 'ftw': ( 'uint', 'both', ),
1026 'fop': ( 'uint', 'both', ),
1027 'fpuip': ( 'uint', 'both', ),
1028 'fpucs': ( 'uint', 'both', ),
1029 'fpudp': ( 'uint', 'both', ),
1030 'fpuds': ( 'uint', 'both', ),
1031 'mxcsr': ( 'uint', 'both', ),
1032 'st0': ( 'uint', 'both', ),
1033 'st1': ( 'uint', 'both', ),
1034 'st2': ( 'uint', 'both', ),
1035 'st3': ( 'uint', 'both', ),
1036 'st4': ( 'uint', 'both', ),
1037 'st5': ( 'uint', 'both', ),
1038 'st6': ( 'uint', 'both', ),
1039 'st7': ( 'uint', 'both', ),
1040 # MMX registers.
1041 'mm0': ( 'uint', 'both', ),
1042 'mm1': ( 'uint', 'both', ),
1043 'mm2': ( 'uint', 'both', ),
1044 'mm3': ( 'uint', 'both', ),
1045 'mm4': ( 'uint', 'both', ),
1046 'mm5': ( 'uint', 'both', ),
1047 'mm6': ( 'uint', 'both', ),
1048 'mm7': ( 'uint', 'both', ),
1049 # SSE registers.
1050 'xmm0': ( 'uint', 'both', ),
1051 'xmm1': ( 'uint', 'both', ),
1052 'xmm2': ( 'uint', 'both', ),
1053 'xmm3': ( 'uint', 'both', ),
1054 'xmm4': ( 'uint', 'both', ),
1055 'xmm5': ( 'uint', 'both', ),
1056 'xmm6': ( 'uint', 'both', ),
1057 'xmm7': ( 'uint', 'both', ),
1058 'xmm8': ( 'uint', 'both', ),
1059 'xmm9': ( 'uint', 'both', ),
1060 'xmm10': ( 'uint', 'both', ),
1061 'xmm11': ( 'uint', 'both', ),
1062 'xmm12': ( 'uint', 'both', ),
1063 'xmm13': ( 'uint', 'both', ),
1064 'xmm14': ( 'uint', 'both', ),
1065 'xmm15': ( 'uint', 'both', ),
1066 'xmm0.lo': ( 'uint', 'both', ),
1067 'xmm1.lo': ( 'uint', 'both', ),
1068 'xmm2.lo': ( 'uint', 'both', ),
1069 'xmm3.lo': ( 'uint', 'both', ),
1070 'xmm4.lo': ( 'uint', 'both', ),
1071 'xmm5.lo': ( 'uint', 'both', ),
1072 'xmm6.lo': ( 'uint', 'both', ),
1073 'xmm7.lo': ( 'uint', 'both', ),
1074 'xmm8.lo': ( 'uint', 'both', ),
1075 'xmm9.lo': ( 'uint', 'both', ),
1076 'xmm10.lo': ( 'uint', 'both', ),
1077 'xmm11.lo': ( 'uint', 'both', ),
1078 'xmm12.lo': ( 'uint', 'both', ),
1079 'xmm13.lo': ( 'uint', 'both', ),
1080 'xmm14.lo': ( 'uint', 'both', ),
1081 'xmm15.lo': ( 'uint', 'both', ),
1082 'xmm0.hi': ( 'uint', 'both', ),
1083 'xmm1.hi': ( 'uint', 'both', ),
1084 'xmm2.hi': ( 'uint', 'both', ),
1085 'xmm3.hi': ( 'uint', 'both', ),
1086 'xmm4.hi': ( 'uint', 'both', ),
1087 'xmm5.hi': ( 'uint', 'both', ),
1088 'xmm6.hi': ( 'uint', 'both', ),
1089 'xmm7.hi': ( 'uint', 'both', ),
1090 'xmm8.hi': ( 'uint', 'both', ),
1091 'xmm9.hi': ( 'uint', 'both', ),
1092 'xmm10.hi': ( 'uint', 'both', ),
1093 'xmm11.hi': ( 'uint', 'both', ),
1094 'xmm12.hi': ( 'uint', 'both', ),
1095 'xmm13.hi': ( 'uint', 'both', ),
1096 'xmm14.hi': ( 'uint', 'both', ),
1097 'xmm15.hi': ( 'uint', 'both', ),
1098 'xmm0.lo.zx': ( 'uint', 'both', ),
1099 'xmm1.lo.zx': ( 'uint', 'both', ),
1100 'xmm2.lo.zx': ( 'uint', 'both', ),
1101 'xmm3.lo.zx': ( 'uint', 'both', ),
1102 'xmm4.lo.zx': ( 'uint', 'both', ),
1103 'xmm5.lo.zx': ( 'uint', 'both', ),
1104 'xmm6.lo.zx': ( 'uint', 'both', ),
1105 'xmm7.lo.zx': ( 'uint', 'both', ),
1106 'xmm8.lo.zx': ( 'uint', 'both', ),
1107 'xmm9.lo.zx': ( 'uint', 'both', ),
1108 'xmm10.lo.zx': ( 'uint', 'both', ),
1109 'xmm11.lo.zx': ( 'uint', 'both', ),
1110 'xmm12.lo.zx': ( 'uint', 'both', ),
1111 'xmm13.lo.zx': ( 'uint', 'both', ),
1112 'xmm14.lo.zx': ( 'uint', 'both', ),
1113 'xmm15.lo.zx': ( 'uint', 'both', ),
1114 'xmm0.dw0': ( 'uint', 'both', ),
1115 'xmm1.dw0': ( 'uint', 'both', ),
1116 'xmm2.dw0': ( 'uint', 'both', ),
1117 'xmm3.dw0': ( 'uint', 'both', ),
1118 'xmm4.dw0': ( 'uint', 'both', ),
1119 'xmm5.dw0': ( 'uint', 'both', ),
1120 'xmm6.dw0': ( 'uint', 'both', ),
1121 'xmm7.dw0': ( 'uint', 'both', ),
1122 'xmm8.dw0': ( 'uint', 'both', ),
1123 'xmm9.dw0': ( 'uint', 'both', ),
1124 'xmm10.dw0': ( 'uint', 'both', ),
1125 'xmm11.dw0': ( 'uint', 'both', ),
1126 'xmm12.dw0': ( 'uint', 'both', ),
1127 'xmm13.dw0': ( 'uint', 'both', ),
1128 'xmm14.dw0': ( 'uint', 'both', ),
1129 'xmm15_dw0': ( 'uint', 'both', ),
1130 # AVX registers.
1131 'ymm0': ( 'uint', 'both', ),
1132 'ymm1': ( 'uint', 'both', ),
1133 'ymm2': ( 'uint', 'both', ),
1134 'ymm3': ( 'uint', 'both', ),
1135 'ymm4': ( 'uint', 'both', ),
1136 'ymm5': ( 'uint', 'both', ),
1137 'ymm6': ( 'uint', 'both', ),
1138 'ymm7': ( 'uint', 'both', ),
1139 'ymm8': ( 'uint', 'both', ),
1140 'ymm9': ( 'uint', 'both', ),
1141 'ymm10': ( 'uint', 'both', ),
1142 'ymm11': ( 'uint', 'both', ),
1143 'ymm12': ( 'uint', 'both', ),
1144 'ymm13': ( 'uint', 'both', ),
1145 'ymm14': ( 'uint', 'both', ),
1146 'ymm15': ( 'uint', 'both', ),
1147
1148 # Special ones.
1149 'value.xcpt': ( 'uint', 'output', ),
1150 };
1151
1152 def __init__(self, sField, sOp, sValue, sType):
1153 assert sField in self.kdFields;
1154 assert sOp in self.kasOperators;
1155 self.sField = sField;
1156 self.sOp = sOp;
1157 self.sValue = sValue;
1158 self.sType = sType;
1159 assert isinstance(sField, str);
1160 assert isinstance(sOp, str);
1161 assert isinstance(sType, str);
1162 assert isinstance(sValue, str);
1163
1164
1165class TestSelector(object):
1166 """
1167 One selector for an instruction test.
1168 """
1169 ## Selector compare operators.
1170 kasCompareOps = [ '==', '!=' ];
1171 ## Selector variables and their valid values.
1172 kdVariables = {
1173 # Operand size.
1174 'size': {
1175 'o16': 'size_o16',
1176 'o32': 'size_o32',
1177 'o64': 'size_o64',
1178 },
1179 # VEX.L value.
1180 'vex.l': {
1181 '0': 'vexl_0',
1182 '1': 'vexl_1',
1183 },
1184 # Execution ring.
1185 'ring': {
1186 '0': 'ring_0',
1187 '1': 'ring_1',
1188 '2': 'ring_2',
1189 '3': 'ring_3',
1190 '0..2': 'ring_0_thru_2',
1191 '1..3': 'ring_1_thru_3',
1192 },
1193 # Basic code mode.
1194 'codebits': {
1195 '64': 'code_64bit',
1196 '32': 'code_32bit',
1197 '16': 'code_16bit',
1198 },
1199 # cpu modes.
1200 'mode': {
1201 'real': 'mode_real',
1202 'prot': 'mode_prot',
1203 'long': 'mode_long',
1204 'v86': 'mode_v86',
1205 'smm': 'mode_smm',
1206 'vmx': 'mode_vmx',
1207 'svm': 'mode_svm',
1208 },
1209 # paging on/off
1210 'paging': {
1211 'on': 'paging_on',
1212 'off': 'paging_off',
1213 },
1214 # CPU vendor
1215 'vendor': {
1216 'amd': 'vendor_amd',
1217 'intel': 'vendor_intel',
1218 'via': 'vendor_via',
1219 },
1220 };
1221 ## Selector shorthand predicates.
1222 ## These translates into variable expressions.
1223 kdPredicates = {
1224 'o16': 'size==o16',
1225 'o32': 'size==o32',
1226 'o64': 'size==o64',
1227 'ring0': 'ring==0',
1228 '!ring0': 'ring==1..3',
1229 'ring1': 'ring==1',
1230 'ring2': 'ring==2',
1231 'ring3': 'ring==3',
1232 'user': 'ring==3',
1233 'supervisor': 'ring==0..2',
1234 '16-bit': 'codebits==16',
1235 '32-bit': 'codebits==32',
1236 '64-bit': 'codebits==64',
1237 'real': 'mode==real',
1238 'prot': 'mode==prot',
1239 'long': 'mode==long',
1240 'v86': 'mode==v86',
1241 'smm': 'mode==smm',
1242 'vmx': 'mode==vmx',
1243 'svm': 'mode==svm',
1244 'paging': 'paging==on',
1245 '!paging': 'paging==off',
1246 'amd': 'vendor==amd',
1247 '!amd': 'vendor!=amd',
1248 'intel': 'vendor==intel',
1249 '!intel': 'vendor!=intel',
1250 'via': 'vendor==via',
1251 '!via': 'vendor!=via',
1252 };
1253
1254 def __init__(self, sVariable, sOp, sValue):
1255 assert sVariable in self.kdVariables;
1256 assert sOp in self.kasCompareOps;
1257 assert sValue in self.kdVariables[sVariable];
1258 self.sVariable = sVariable;
1259 self.sOp = sOp;
1260 self.sValue = sValue;
1261
1262
1263class InstructionTest(object):
1264 """
1265 Instruction test.
1266 """
1267
1268 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1269 self.oInstr = oInstr; # type: InstructionTest
1270 self.aoInputs = []; # type: list(TestInOut)
1271 self.aoOutputs = []; # type: list(TestInOut)
1272 self.aoSelectors = []; # type: list(TestSelector)
1273
1274 def toString(self, fRepr = False):
1275 """
1276 Converts it to string representation.
1277 """
1278 asWords = [];
1279 if self.aoSelectors:
1280 for oSelector in self.aoSelectors:
1281 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1282 asWords.append('/');
1283
1284 for oModifier in self.aoInputs:
1285 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1286
1287 asWords.append('->');
1288
1289 for oModifier in self.aoOutputs:
1290 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1291
1292 if fRepr:
1293 return '<' + ' '.join(asWords) + '>';
1294 return ' '.join(asWords);
1295
1296 def __str__(self):
1297 """ Provide string represenation. """
1298 return self.toString(False);
1299
1300 def __repr__(self):
1301 """ Provide unambigious string representation. """
1302 return self.toString(True);
1303
1304class Operand(object):
1305 """
1306 Instruction operand.
1307 """
1308
1309 def __init__(self, sWhere, sType):
1310 assert sWhere in g_kdOpLocations, sWhere;
1311 assert sType in g_kdOpTypes, sType;
1312 self.sWhere = sWhere; ##< g_kdOpLocations
1313 self.sType = sType; ##< g_kdOpTypes
1314
1315 def usesModRM(self):
1316 """ Returns True if using some form of ModR/M encoding. """
1317 return self.sType[0] in ['E', 'G', 'M'];
1318
1319
1320
1321class Instruction(object): # pylint: disable=too-many-instance-attributes
1322 """
1323 Instruction.
1324 """
1325
1326 def __init__(self, sSrcFile, iLine):
1327 ## @name Core attributes.
1328 ## @{
1329 self.sMnemonic = None;
1330 self.sBrief = None;
1331 self.asDescSections = []; # type: list(str)
1332 self.aoMaps = []; # type: list(InstructionMap)
1333 self.aoOperands = []; # type: list(Operand)
1334 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1335 self.sOpcode = None; # type: str
1336 self.sSubOpcode = None; # type: str
1337 self.sEncoding = None;
1338 self.asFlTest = None;
1339 self.asFlModify = None;
1340 self.asFlUndefined = None;
1341 self.asFlSet = None;
1342 self.asFlClear = None;
1343 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1344 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1345 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1346 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1347 self.aoTests = []; # type: list(InstructionTest)
1348 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1349 self.oCpuExpr = None; ##< Some CPU restriction expression...
1350 self.sGroup = None;
1351 self.fUnused = False; ##< Unused instruction.
1352 self.fInvalid = False; ##< Invalid instruction (like UD2).
1353 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1354 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1355 ## @}
1356
1357 ## @name Implementation attributes.
1358 ## @{
1359 self.sStats = None;
1360 self.sFunction = None;
1361 self.fStub = False;
1362 self.fUdStub = False;
1363 ## @}
1364
1365 ## @name Decoding info
1366 ## @{
1367 self.sSrcFile = sSrcFile;
1368 self.iLineCreated = iLine;
1369 self.iLineCompleted = None;
1370 self.cOpTags = 0;
1371 self.iLineFnIemOpMacro = -1;
1372 self.iLineMnemonicMacro = -1;
1373 ## @}
1374
1375 ## @name Intermediate input fields.
1376 ## @{
1377 self.sRawDisOpNo = None;
1378 self.asRawDisParams = [];
1379 self.sRawIemOpFlags = None;
1380 self.sRawOldOpcodes = None;
1381 self.asCopyTests = [];
1382 ## @}
1383
1384 def toString(self, fRepr = False):
1385 """ Turn object into a string. """
1386 aasFields = [];
1387
1388 aasFields.append(['opcode', self.sOpcode]);
1389 aasFields.append(['mnemonic', self.sMnemonic]);
1390 for iOperand, oOperand in enumerate(self.aoOperands):
1391 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1392 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1393 aasFields.append(['encoding', self.sEncoding]);
1394 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1395 aasFields.append(['disenum', self.sDisEnum]);
1396 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1397 aasFields.append(['group', self.sGroup]);
1398 if self.fUnused: aasFields.append(['unused', 'True']);
1399 if self.fInvalid: aasFields.append(['invalid', 'True']);
1400 aasFields.append(['invlstyle', self.sInvalidStyle]);
1401 aasFields.append(['fltest', self.asFlTest]);
1402 aasFields.append(['flmodify', self.asFlModify]);
1403 aasFields.append(['flundef', self.asFlUndefined]);
1404 aasFields.append(['flset', self.asFlSet]);
1405 aasFields.append(['flclear', self.asFlClear]);
1406 aasFields.append(['mincpu', self.sMinCpu]);
1407 aasFields.append(['stats', self.sStats]);
1408 aasFields.append(['sFunction', self.sFunction]);
1409 if self.fStub: aasFields.append(['fStub', 'True']);
1410 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1411 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1412 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1413 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1414
1415 sRet = '<' if fRepr else '';
1416 for sField, sValue in aasFields:
1417 if sValue != None:
1418 if len(sRet) > 1:
1419 sRet += '; ';
1420 sRet += '%s=%s' % (sField, sValue,);
1421 if fRepr:
1422 sRet += '>';
1423
1424 return sRet;
1425
1426 def __str__(self):
1427 """ Provide string represenation. """
1428 return self.toString(False);
1429
1430 def __repr__(self):
1431 """ Provide unambigious string representation. """
1432 return self.toString(True);
1433
1434 def getOpcodeByte(self):
1435 """
1436 Decodes sOpcode into a byte range integer value.
1437 Raises exception if sOpcode is None or invalid.
1438 """
1439 if self.sOpcode is None:
1440 raise Exception('No opcode byte for %s!' % (self,));
1441 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1442
1443 # Full hex byte form.
1444 if sOpcode[:2] == '0x':
1445 return int(sOpcode, 16);
1446
1447 # The /r form:
1448 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1449 return int(sOpcode[-1:]) << 3;
1450
1451 # The 11/r form:
1452 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1453 return (int(sOpcode[-1:]) << 3) | 0xc0;
1454
1455 # The !11/r form (returns mod=1):
1456 ## @todo this doesn't really work...
1457 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1458 return (int(sOpcode[-1:]) << 3) | 0x80;
1459
1460 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1461
1462 @staticmethod
1463 def _flagsToIntegerMask(asFlags):
1464 """
1465 Returns the integer mask value for asFlags.
1466 """
1467 uRet = 0;
1468 if asFlags:
1469 for sFlag in asFlags:
1470 sConstant = g_kdEFlagsMnemonics[sFlag];
1471 assert sConstant[0] != '!', sConstant
1472 uRet |= g_kdX86EFlagsConstants[sConstant];
1473 return uRet;
1474
1475 def getTestedFlagsMask(self):
1476 """ Returns asFlTest into a integer mask value """
1477 return self._flagsToIntegerMask(self.asFlTest);
1478
1479 def getModifiedFlagsMask(self):
1480 """ Returns asFlModify into a integer mask value """
1481 return self._flagsToIntegerMask(self.asFlModify);
1482
1483 def getUndefinedFlagsMask(self):
1484 """ Returns asFlUndefined into a integer mask value """
1485 return self._flagsToIntegerMask(self.asFlUndefined);
1486
1487 def getSetFlagsMask(self):
1488 """ Returns asFlSet into a integer mask value """
1489 return self._flagsToIntegerMask(self.asFlSet);
1490
1491 def getClearedFlagsMask(self):
1492 """ Returns asFlClear into a integer mask value """
1493 return self._flagsToIntegerMask(self.asFlClear);
1494
1495 def onlyInVexMaps(self):
1496 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1497 if not self.aoMaps:
1498 return False;
1499 for oMap in self.aoMaps:
1500 if not oMap.isVexMap():
1501 return False;
1502 return True;
1503
1504
1505
1506## All the instructions.
1507g_aoAllInstructions = []; # type: list(Instruction)
1508
1509## All the instructions indexed by statistics name (opstat).
1510g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1511
1512## All the instructions indexed by function name (opfunction).
1513g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1514
1515## Instructions tagged by oponlytest
1516g_aoOnlyTestInstructions = []; # type: list(Instruction)
1517
1518## Instruction maps.
1519g_dInstructionMaps = {
1520 'one': InstructionMap('one'),
1521 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1522 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1523 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1524 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1525 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1526 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1527 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1528 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1529 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1530 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1531 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1532 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1533 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1534 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1535 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1536 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1537 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1538 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1539 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1540
1541 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1542 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1543 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1544 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1545 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1546 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1547 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1548 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1549 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1550 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1551 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1552 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1553 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1554 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1555
1556 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1557 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1558
1559 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1560 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1561 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1562 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1563 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1564 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1565
1566 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1567 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1568
1569 '3dnow': InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1570 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1571 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1572 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1573 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1574 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1575 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1576 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1577};
1578
1579
1580
1581class ParserException(Exception):
1582 """ Parser exception """
1583 def __init__(self, sMessage):
1584 Exception.__init__(self, sMessage);
1585
1586
1587class SimpleParser(object):
1588 """
1589 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1590 """
1591
1592 ## @name Parser state.
1593 ## @{
1594 kiCode = 0;
1595 kiCommentMulti = 1;
1596 ## @}
1597
1598 def __init__(self, sSrcFile, asLines, sDefaultMap):
1599 self.sSrcFile = sSrcFile;
1600 self.asLines = asLines;
1601 self.iLine = 0;
1602 self.iState = self.kiCode;
1603 self.sComment = '';
1604 self.iCommentLine = 0;
1605 self.aoCurInstrs = [];
1606
1607 assert sDefaultMap in g_dInstructionMaps;
1608 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1609
1610 self.cTotalInstr = 0;
1611 self.cTotalStubs = 0;
1612 self.cTotalTagged = 0;
1613
1614 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1615 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1616 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1617 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1618 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1619 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1620 self.fDebug = True;
1621
1622 self.dTagHandlers = {
1623 '@opbrief': self.parseTagOpBrief,
1624 '@opdesc': self.parseTagOpDesc,
1625 '@opmnemonic': self.parseTagOpMnemonic,
1626 '@op1': self.parseTagOpOperandN,
1627 '@op2': self.parseTagOpOperandN,
1628 '@op3': self.parseTagOpOperandN,
1629 '@op4': self.parseTagOpOperandN,
1630 '@oppfx': self.parseTagOpPfx,
1631 '@opmaps': self.parseTagOpMaps,
1632 '@opcode': self.parseTagOpcode,
1633 '@opcodesub': self.parseTagOpcodeSub,
1634 '@openc': self.parseTagOpEnc,
1635 '@opfltest': self.parseTagOpEFlags,
1636 '@opflmodify': self.parseTagOpEFlags,
1637 '@opflundef': self.parseTagOpEFlags,
1638 '@opflset': self.parseTagOpEFlags,
1639 '@opflclear': self.parseTagOpEFlags,
1640 '@ophints': self.parseTagOpHints,
1641 '@opdisenum': self.parseTagOpDisEnum,
1642 '@opmincpu': self.parseTagOpMinCpu,
1643 '@opcpuid': self.parseTagOpCpuId,
1644 '@opgroup': self.parseTagOpGroup,
1645 '@opunused': self.parseTagOpUnusedInvalid,
1646 '@opinvalid': self.parseTagOpUnusedInvalid,
1647 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1648 '@optest': self.parseTagOpTest,
1649 '@optestign': self.parseTagOpTestIgnore,
1650 '@optestignore': self.parseTagOpTestIgnore,
1651 '@opcopytests': self.parseTagOpCopyTests,
1652 '@oponly': self.parseTagOpOnlyTest,
1653 '@oponlytest': self.parseTagOpOnlyTest,
1654 '@opxcpttype': self.parseTagOpXcptType,
1655 '@opstats': self.parseTagOpStats,
1656 '@opfunction': self.parseTagOpFunction,
1657 '@opdone': self.parseTagOpDone,
1658 };
1659 for i in range(48):
1660 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1661 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1662
1663 self.asErrors = [];
1664
1665 def raiseError(self, sMessage):
1666 """
1667 Raise error prefixed with the source and line number.
1668 """
1669 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1670
1671 def raiseCommentError(self, iLineInComment, sMessage):
1672 """
1673 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1674 """
1675 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1676
1677 def error(self, sMessage):
1678 """
1679 Adds an error.
1680 returns False;
1681 """
1682 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1683 return False;
1684
1685 def errorComment(self, iLineInComment, sMessage):
1686 """
1687 Adds a comment error.
1688 returns False;
1689 """
1690 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1691 return False;
1692
1693 def printErrors(self):
1694 """
1695 Print the errors to stderr.
1696 Returns number of errors.
1697 """
1698 if self.asErrors:
1699 sys.stderr.write(u''.join(self.asErrors));
1700 return len(self.asErrors);
1701
1702 def debug(self, sMessage):
1703 """
1704 For debugging.
1705 """
1706 if self.fDebug:
1707 print('debug: %s' % (sMessage,));
1708
1709
1710 def addInstruction(self, iLine = None):
1711 """
1712 Adds an instruction.
1713 """
1714 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1715 g_aoAllInstructions.append(oInstr);
1716 self.aoCurInstrs.append(oInstr);
1717 return oInstr;
1718
1719 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1720 """
1721 Derives the mnemonic and operands from a IEM stats base name like string.
1722 """
1723 if oInstr.sMnemonic is None:
1724 asWords = sStats.split('_');
1725 oInstr.sMnemonic = asWords[0].lower();
1726 if len(asWords) > 1 and not oInstr.aoOperands:
1727 for sType in asWords[1:]:
1728 if sType in g_kdOpTypes:
1729 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1730 else:
1731 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1732 return False;
1733 return True;
1734
1735 def doneInstructionOne(self, oInstr, iLine):
1736 """
1737 Complete the parsing by processing, validating and expanding raw inputs.
1738 """
1739 assert oInstr.iLineCompleted is None;
1740 oInstr.iLineCompleted = iLine;
1741
1742 #
1743 # Specified instructions.
1744 #
1745 if oInstr.cOpTags > 0:
1746 if oInstr.sStats is None:
1747 pass;
1748
1749 #
1750 # Unspecified legacy stuff. We generally only got a few things to go on here.
1751 # /** Opcode 0x0f 0x00 /0. */
1752 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1753 #
1754 else:
1755 #if oInstr.sRawOldOpcodes:
1756 #
1757 #if oInstr.sMnemonic:
1758 pass;
1759
1760 #
1761 # Common defaults.
1762 #
1763
1764 # Guess mnemonic and operands from stats if the former is missing.
1765 if oInstr.sMnemonic is None:
1766 if oInstr.sStats is not None:
1767 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1768 elif oInstr.sFunction is not None:
1769 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1770
1771 # Derive the disassembler op enum constant from the mnemonic.
1772 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1773 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1774
1775 # Derive the IEM statistics base name from mnemonic and operand types.
1776 if oInstr.sStats is None:
1777 if oInstr.sFunction is not None:
1778 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1779 elif oInstr.sMnemonic is not None:
1780 oInstr.sStats = oInstr.sMnemonic;
1781 for oOperand in oInstr.aoOperands:
1782 if oOperand.sType:
1783 oInstr.sStats += '_' + oOperand.sType;
1784
1785 # Derive the IEM function name from mnemonic and operand types.
1786 if oInstr.sFunction is None:
1787 if oInstr.sMnemonic is not None:
1788 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1789 for oOperand in oInstr.aoOperands:
1790 if oOperand.sType:
1791 oInstr.sFunction += '_' + oOperand.sType;
1792 elif oInstr.sStats:
1793 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1794
1795 #
1796 # Apply default map and then add the instruction to all it's groups.
1797 #
1798 if not oInstr.aoMaps:
1799 oInstr.aoMaps = [ self.oDefaultMap, ];
1800 for oMap in oInstr.aoMaps:
1801 oMap.aoInstructions.append(oInstr);
1802
1803 #
1804 # Derive encoding from operands and maps.
1805 #
1806 if oInstr.sEncoding is None:
1807 if not oInstr.aoOperands:
1808 if oInstr.fUnused and oInstr.sSubOpcode:
1809 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
1810 else:
1811 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
1812 elif oInstr.aoOperands[0].usesModRM():
1813 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
1814 or oInstr.onlyInVexMaps():
1815 oInstr.sEncoding = 'VEX.ModR/M';
1816 else:
1817 oInstr.sEncoding = 'ModR/M';
1818
1819 #
1820 # Check the opstat value and add it to the opstat indexed dictionary.
1821 #
1822 if oInstr.sStats:
1823 if oInstr.sStats not in g_dAllInstructionsByStat:
1824 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1825 else:
1826 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1827 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1828
1829 #
1830 # Add to function indexed dictionary. We allow multiple instructions per function.
1831 #
1832 if oInstr.sFunction:
1833 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1834 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1835 else:
1836 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1837
1838 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1839 return True;
1840
1841 def doneInstructions(self, iLineInComment = None):
1842 """
1843 Done with current instruction.
1844 """
1845 for oInstr in self.aoCurInstrs:
1846 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1847 if oInstr.fStub:
1848 self.cTotalStubs += 1;
1849
1850 self.cTotalInstr += len(self.aoCurInstrs);
1851
1852 self.sComment = '';
1853 self.aoCurInstrs = [];
1854 return True;
1855
1856 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1857 """
1858 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1859 is False, only None values and empty strings are replaced.
1860 """
1861 for oInstr in self.aoCurInstrs:
1862 if fOverwrite is not True:
1863 oOldValue = getattr(oInstr, sAttrib);
1864 if oOldValue is not None:
1865 continue;
1866 setattr(oInstr, sAttrib, oValue);
1867
1868 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1869 """
1870 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1871 If fOverwrite is False, only None values and empty strings are replaced.
1872 """
1873 for oInstr in self.aoCurInstrs:
1874 aoArray = getattr(oInstr, sAttrib);
1875 while len(aoArray) <= iEntry:
1876 aoArray.append(None);
1877 if fOverwrite is True or aoArray[iEntry] is None:
1878 aoArray[iEntry] = oValue;
1879
1880 def parseCommentOldOpcode(self, asLines):
1881 """ Deals with 'Opcode 0xff /4' like comments """
1882 asWords = asLines[0].split();
1883 if len(asWords) >= 2 \
1884 and asWords[0] == 'Opcode' \
1885 and ( asWords[1].startswith('0x')
1886 or asWords[1].startswith('0X')):
1887 asWords = asWords[:1];
1888 for iWord, sWord in enumerate(asWords):
1889 if sWord.startswith('0X'):
1890 sWord = '0x' + sWord[:2];
1891 asWords[iWord] = asWords;
1892 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1893
1894 return False;
1895
1896 def ensureInstructionForOpTag(self, iTagLine):
1897 """ Ensure there is an instruction for the op-tag being parsed. """
1898 if not self.aoCurInstrs:
1899 self.addInstruction(self.iCommentLine + iTagLine);
1900 for oInstr in self.aoCurInstrs:
1901 oInstr.cOpTags += 1;
1902 if oInstr.cOpTags == 1:
1903 self.cTotalTagged += 1;
1904 return self.aoCurInstrs[-1];
1905
1906 @staticmethod
1907 def flattenSections(aasSections):
1908 """
1909 Flattens multiline sections into stripped single strings.
1910 Returns list of strings, on section per string.
1911 """
1912 asRet = [];
1913 for asLines in aasSections:
1914 if asLines:
1915 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1916 return asRet;
1917
1918 @staticmethod
1919 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1920 """
1921 Flattens sections into a simple stripped string with newlines as
1922 section breaks. The final section does not sport a trailing newline.
1923 """
1924 # Typical: One section with a single line.
1925 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1926 return aasSections[0][0].strip();
1927
1928 sRet = '';
1929 for iSection, asLines in enumerate(aasSections):
1930 if asLines:
1931 if iSection > 0:
1932 sRet += sSectionSep;
1933 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1934 return sRet;
1935
1936
1937
1938 ## @name Tag parsers
1939 ## @{
1940
1941 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1942 """
1943 Tag: \@opbrief
1944 Value: Text description, multiple sections, appended.
1945
1946 Brief description. If not given, it's the first sentence from @opdesc.
1947 """
1948 oInstr = self.ensureInstructionForOpTag(iTagLine);
1949
1950 # Flatten and validate the value.
1951 sBrief = self.flattenAllSections(aasSections);
1952 if not sBrief:
1953 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1954 if sBrief[-1] != '.':
1955 sBrief = sBrief + '.';
1956 if len(sBrief) > 180:
1957 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1958 offDot = sBrief.find('.');
1959 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1960 offDot = sBrief.find('.', offDot + 1);
1961 if offDot >= 0 and offDot != len(sBrief) - 1:
1962 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1963
1964 # Update the instruction.
1965 if oInstr.sBrief is not None:
1966 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1967 % (sTag, oInstr.sBrief, sBrief,));
1968 _ = iEndLine;
1969 return True;
1970
1971 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1972 """
1973 Tag: \@opdesc
1974 Value: Text description, multiple sections, appended.
1975
1976 It is used to describe instructions.
1977 """
1978 oInstr = self.ensureInstructionForOpTag(iTagLine);
1979 if aasSections:
1980 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1981 return True;
1982
1983 _ = sTag; _ = iEndLine;
1984 return True;
1985
1986 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1987 """
1988 Tag: @opmenmonic
1989 Value: mnemonic
1990
1991 The 'mnemonic' value must be a valid C identifier string. Because of
1992 prefixes, groups and whatnot, there times when the mnemonic isn't that
1993 of an actual assembler mnemonic.
1994 """
1995 oInstr = self.ensureInstructionForOpTag(iTagLine);
1996
1997 # Flatten and validate the value.
1998 sMnemonic = self.flattenAllSections(aasSections);
1999 if not self.oReMnemonic.match(sMnemonic):
2000 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
2001 if oInstr.sMnemonic is not None:
2002 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
2003 % (sTag, oInstr.sMnemonic, sMnemonic,));
2004 oInstr.sMnemonic = sMnemonic
2005
2006 _ = iEndLine;
2007 return True;
2008
2009 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
2010 """
2011 Tags: \@op1, \@op2, \@op3, \@op4
2012 Value: [where:]type
2013
2014 The 'where' value indicates where the operand is found, like the 'reg'
2015 part of the ModR/M encoding. See Instruction.kdOperandLocations for
2016 a list.
2017
2018 The 'type' value indicates the operand type. These follow the types
2019 given in the opcode tables in the CPU reference manuals.
2020 See Instruction.kdOperandTypes for a list.
2021
2022 """
2023 oInstr = self.ensureInstructionForOpTag(iTagLine);
2024 idxOp = int(sTag[-1]) - 1;
2025 assert idxOp >= 0 and idxOp < 4;
2026
2027 # flatten, split up, and validate the "where:type" value.
2028 sFlattened = self.flattenAllSections(aasSections);
2029 asSplit = sFlattened.split(':');
2030 if len(asSplit) == 1:
2031 sType = asSplit[0];
2032 sWhere = None;
2033 elif len(asSplit) == 2:
2034 (sWhere, sType) = asSplit;
2035 else:
2036 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
2037
2038 if sType not in g_kdOpTypes:
2039 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2040 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
2041 if sWhere is None:
2042 sWhere = g_kdOpTypes[sType][1];
2043 elif sWhere not in g_kdOpLocations:
2044 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2045 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
2046
2047 # Insert the operand, refusing to overwrite an existing one.
2048 while idxOp >= len(oInstr.aoOperands):
2049 oInstr.aoOperands.append(None);
2050 if oInstr.aoOperands[idxOp] is not None:
2051 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
2052 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
2053 sWhere, sType,));
2054 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
2055
2056 _ = iEndLine;
2057 return True;
2058
2059 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
2060 """
2061 Tag: \@opmaps
2062 Value: map[,map2]
2063
2064 Indicates which maps the instruction is in. There is a default map
2065 associated with each input file.
2066 """
2067 oInstr = self.ensureInstructionForOpTag(iTagLine);
2068
2069 # Flatten, split up and validate the value.
2070 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
2071 asMaps = sFlattened.split(',');
2072 if not asMaps:
2073 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2074 for sMap in asMaps:
2075 if sMap not in g_dInstructionMaps:
2076 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
2077 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2078
2079 # Add the maps to the current list. Throw errors on duplicates.
2080 for oMap in oInstr.aoMaps:
2081 if oMap.sName in asMaps:
2082 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2083
2084 for sMap in asMaps:
2085 oMap = g_dInstructionMaps[sMap];
2086 if oMap not in oInstr.aoMaps:
2087 oInstr.aoMaps.append(oMap);
2088 else:
2089 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2090
2091 _ = iEndLine;
2092 return True;
2093
2094 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2095 """
2096 Tag: \@oppfx
2097 Value: n/a|none|0x66|0xf3|0xf2
2098
2099 Required prefix for the instruction. (In a (E)VEX context this is the
2100 value of the 'pp' field rather than an actual prefix.)
2101 """
2102 oInstr = self.ensureInstructionForOpTag(iTagLine);
2103
2104 # Flatten and validate the value.
2105 sFlattened = self.flattenAllSections(aasSections);
2106 asPrefixes = sFlattened.split();
2107 if len(asPrefixes) > 1:
2108 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2109
2110 sPrefix = asPrefixes[0].lower();
2111 if sPrefix == 'none':
2112 sPrefix = 'none';
2113 elif sPrefix == 'n/a':
2114 sPrefix = None;
2115 else:
2116 if len(sPrefix) == 2:
2117 sPrefix = '0x' + sPrefix;
2118 if not _isValidOpcodeByte(sPrefix):
2119 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2120
2121 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2122 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2123
2124 # Set it.
2125 if oInstr.sPrefix is not None:
2126 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2127 oInstr.sPrefix = sPrefix;
2128
2129 _ = iEndLine;
2130 return True;
2131
2132 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2133 """
2134 Tag: \@opcode
2135 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2136
2137 The opcode byte or sub-byte for the instruction in the context of a map.
2138 """
2139 oInstr = self.ensureInstructionForOpTag(iTagLine);
2140
2141 # Flatten and validate the value.
2142 sOpcode = self.flattenAllSections(aasSections);
2143 if _isValidOpcodeByte(sOpcode):
2144 pass;
2145 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2146 pass;
2147 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2148 pass;
2149 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2150 pass;
2151 else:
2152 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2153
2154 # Set it.
2155 if oInstr.sOpcode is not None:
2156 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2157 oInstr.sOpcode = sOpcode;
2158
2159 _ = iEndLine;
2160 return True;
2161
2162 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2163 """
2164 Tag: \@opcodesub
2165 Value: none | 11 mr/reg | !11 mr/reg
2166
2167 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2168 represents exactly two different instructions. The more proper way would
2169 be to go via maps with two members, but this is faster.
2170 """
2171 oInstr = self.ensureInstructionForOpTag(iTagLine);
2172
2173 # Flatten and validate the value.
2174 sSubOpcode = self.flattenAllSections(aasSections);
2175 if sSubOpcode not in g_kdSubOpcodes:
2176 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2177 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2178
2179 # Set it.
2180 if oInstr.sSubOpcode is not None:
2181 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2182 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2183 oInstr.sSubOpcode = sSubOpcode;
2184
2185 _ = iEndLine;
2186 return True;
2187
2188 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2189 """
2190 Tag: \@openc
2191 Value: ModR/M|fixed|prefix|<map name>
2192
2193 The instruction operand encoding style.
2194 """
2195 oInstr = self.ensureInstructionForOpTag(iTagLine);
2196
2197 # Flatten and validate the value.
2198 sEncoding = self.flattenAllSections(aasSections);
2199 if sEncoding in g_kdEncodings:
2200 pass;
2201 elif sEncoding in g_dInstructionMaps:
2202 pass;
2203 elif not _isValidOpcodeByte(sEncoding):
2204 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2205
2206 # Set it.
2207 if oInstr.sEncoding is not None:
2208 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2209 % ( sTag, oInstr.sEncoding, sEncoding,));
2210 oInstr.sEncoding = sEncoding;
2211
2212 _ = iEndLine;
2213 return True;
2214
2215 ## EFlags tag to Instruction attribute name.
2216 kdOpFlagToAttr = {
2217 '@opfltest': 'asFlTest',
2218 '@opflmodify': 'asFlModify',
2219 '@opflundef': 'asFlUndefined',
2220 '@opflset': 'asFlSet',
2221 '@opflclear': 'asFlClear',
2222 };
2223
2224 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2225 """
2226 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2227 Value: <eflags specifier>
2228
2229 """
2230 oInstr = self.ensureInstructionForOpTag(iTagLine);
2231
2232 # Flatten, split up and validate the values.
2233 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2234 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2235 asFlags = [];
2236 else:
2237 fRc = True;
2238 for iFlag, sFlag in enumerate(asFlags):
2239 if sFlag not in g_kdEFlagsMnemonics:
2240 if sFlag.strip() in g_kdEFlagsMnemonics:
2241 asFlags[iFlag] = sFlag.strip();
2242 else:
2243 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2244 if not fRc:
2245 return False;
2246
2247 # Set them.
2248 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2249 if asOld is not None:
2250 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2251 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2252
2253 _ = iEndLine;
2254 return True;
2255
2256 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2257 """
2258 Tag: \@ophints
2259 Value: Comma or space separated list of flags and hints.
2260
2261 This covers the disassembler flags table and more.
2262 """
2263 oInstr = self.ensureInstructionForOpTag(iTagLine);
2264
2265 # Flatten as a space separated list, split it up and validate the values.
2266 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2267 if len(asHints) == 1 and asHints[0].lower() == 'none':
2268 asHints = [];
2269 else:
2270 fRc = True;
2271 for iHint, sHint in enumerate(asHints):
2272 if sHint not in g_kdHints:
2273 if sHint.strip() in g_kdHints:
2274 sHint[iHint] = sHint.strip();
2275 else:
2276 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2277 if not fRc:
2278 return False;
2279
2280 # Append them.
2281 for sHint in asHints:
2282 if sHint not in oInstr.dHints:
2283 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2284 else:
2285 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2286
2287 _ = iEndLine;
2288 return True;
2289
2290 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2291 """
2292 Tag: \@opdisenum
2293 Value: OP_XXXX
2294
2295 This is for select a specific (legacy) disassembler enum value for the
2296 instruction.
2297 """
2298 oInstr = self.ensureInstructionForOpTag(iTagLine);
2299
2300 # Flatten and split.
2301 asWords = self.flattenAllSections(aasSections).split();
2302 if len(asWords) != 1:
2303 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2304 if not asWords:
2305 return False;
2306 sDisEnum = asWords[0];
2307 if not self.oReDisEnum.match(sDisEnum):
2308 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2309 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2310
2311 # Set it.
2312 if oInstr.sDisEnum is not None:
2313 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2314 oInstr.sDisEnum = sDisEnum;
2315
2316 _ = iEndLine;
2317 return True;
2318
2319 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2320 """
2321 Tag: \@opmincpu
2322 Value: <simple CPU name>
2323
2324 Indicates when this instruction was introduced.
2325 """
2326 oInstr = self.ensureInstructionForOpTag(iTagLine);
2327
2328 # Flatten the value, split into words, make sure there's just one, valid it.
2329 asCpus = self.flattenAllSections(aasSections).split();
2330 if len(asCpus) > 1:
2331 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2332
2333 sMinCpu = asCpus[0];
2334 if sMinCpu in g_kdCpuNames:
2335 oInstr.sMinCpu = sMinCpu;
2336 else:
2337 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2338 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2339
2340 # Set it.
2341 if oInstr.sMinCpu is None:
2342 oInstr.sMinCpu = sMinCpu;
2343 elif oInstr.sMinCpu != sMinCpu:
2344 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2345
2346 _ = iEndLine;
2347 return True;
2348
2349 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2350 """
2351 Tag: \@opcpuid
2352 Value: none | <CPUID flag specifier>
2353
2354 CPUID feature bit which is required for the instruction to be present.
2355 """
2356 oInstr = self.ensureInstructionForOpTag(iTagLine);
2357
2358 # Flatten as a space separated list, split it up and validate the values.
2359 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2360 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2361 asCpuIds = [];
2362 else:
2363 fRc = True;
2364 for iCpuId, sCpuId in enumerate(asCpuIds):
2365 if sCpuId not in g_kdCpuIdFlags:
2366 if sCpuId.strip() in g_kdCpuIdFlags:
2367 sCpuId[iCpuId] = sCpuId.strip();
2368 else:
2369 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2370 if not fRc:
2371 return False;
2372
2373 # Append them.
2374 for sCpuId in asCpuIds:
2375 if sCpuId not in oInstr.asCpuIds:
2376 oInstr.asCpuIds.append(sCpuId);
2377 else:
2378 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2379
2380 _ = iEndLine;
2381 return True;
2382
2383 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2384 """
2385 Tag: \@opgroup
2386 Value: op_grp1[_subgrp2[_subsubgrp3]]
2387
2388 Instruction grouping.
2389 """
2390 oInstr = self.ensureInstructionForOpTag(iTagLine);
2391
2392 # Flatten as a space separated list, split it up and validate the values.
2393 asGroups = self.flattenAllSections(aasSections).split();
2394 if len(asGroups) != 1:
2395 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2396 sGroup = asGroups[0];
2397 if not self.oReGroupName.match(sGroup):
2398 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2399 % (sTag, sGroup, self.oReGroupName.pattern));
2400
2401 # Set it.
2402 if oInstr.sGroup is not None:
2403 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2404 oInstr.sGroup = sGroup;
2405
2406 _ = iEndLine;
2407 return True;
2408
2409 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2410 """
2411 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2412 Value: <invalid opcode behaviour style>
2413
2414 The \@opunused indicates the specification is for a currently unused
2415 instruction encoding.
2416
2417 The \@opinvalid indicates the specification is for an invalid currently
2418 instruction encoding (like UD2).
2419
2420 The \@opinvlstyle just indicates how CPUs decode the instruction when
2421 not supported (\@opcpuid, \@opmincpu) or disabled.
2422 """
2423 oInstr = self.ensureInstructionForOpTag(iTagLine);
2424
2425 # Flatten as a space separated list, split it up and validate the values.
2426 asStyles = self.flattenAllSections(aasSections).split();
2427 if len(asStyles) != 1:
2428 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2429 sStyle = asStyles[0];
2430 if sStyle not in g_kdInvalidStyles:
2431 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2432 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2433 # Set it.
2434 if oInstr.sInvalidStyle is not None:
2435 return self.errorComment(iTagLine,
2436 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2437 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2438 oInstr.sInvalidStyle = sStyle;
2439 if sTag == '@opunused':
2440 oInstr.fUnused = True;
2441 elif sTag == '@opinvalid':
2442 oInstr.fInvalid = True;
2443
2444 _ = iEndLine;
2445 return True;
2446
2447 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2448 """
2449 Tag: \@optest
2450 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2451 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2452
2453 The main idea here is to generate basic instruction tests.
2454
2455 The probably simplest way of handling the diverse input, would be to use
2456 it to produce size optimized byte code for a simple interpreter that
2457 modifies the register input and output states.
2458
2459 An alternative to the interpreter would be creating multiple tables,
2460 but that becomes rather complicated wrt what goes where and then to use
2461 them in an efficient manner.
2462 """
2463 oInstr = self.ensureInstructionForOpTag(iTagLine);
2464
2465 #
2466 # Do it section by section.
2467 #
2468 for asSectionLines in aasSections:
2469 #
2470 # Sort the input into outputs, inputs and selector conditions.
2471 #
2472 sFlatSection = self.flattenAllSections([asSectionLines,]);
2473 if not sFlatSection:
2474 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2475 continue;
2476 oTest = InstructionTest(oInstr);
2477
2478 asSelectors = [];
2479 asInputs = [];
2480 asOutputs = [];
2481 asCur = asOutputs;
2482 fRc = True;
2483 asWords = sFlatSection.split();
2484 for iWord in range(len(asWords) - 1, -1, -1):
2485 sWord = asWords[iWord];
2486 # Check for array switchers.
2487 if sWord == '->':
2488 if asCur != asOutputs:
2489 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2490 break;
2491 asCur = asInputs;
2492 elif sWord == '/':
2493 if asCur != asInputs:
2494 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2495 break;
2496 asCur = asSelectors;
2497 else:
2498 asCur.insert(0, sWord);
2499
2500 #
2501 # Validate and add selectors.
2502 #
2503 for sCond in asSelectors:
2504 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2505 oSelector = None;
2506 for sOp in TestSelector.kasCompareOps:
2507 off = sCondExp.find(sOp);
2508 if off >= 0:
2509 sVariable = sCondExp[:off];
2510 sValue = sCondExp[off + len(sOp):];
2511 if sVariable in TestSelector.kdVariables:
2512 if sValue in TestSelector.kdVariables[sVariable]:
2513 oSelector = TestSelector(sVariable, sOp, sValue);
2514 else:
2515 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2516 % ( sTag, sValue, sCond,
2517 TestSelector.kdVariables[sVariable].keys(),));
2518 else:
2519 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2520 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2521 break;
2522 if oSelector is not None:
2523 for oExisting in oTest.aoSelectors:
2524 if oExisting.sVariable == oSelector.sVariable:
2525 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2526 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2527 oTest.aoSelectors.append(oSelector);
2528 else:
2529 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2530
2531 #
2532 # Validate outputs and inputs, adding them to the test as we go along.
2533 #
2534 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2535 asValidFieldKinds = [ 'both', sDesc, ];
2536 for sItem in asItems:
2537 oItem = None;
2538 for sOp in TestInOut.kasOperators:
2539 off = sItem.find(sOp);
2540 if off < 0:
2541 continue;
2542 sField = sItem[:off];
2543 sValueType = sItem[off + len(sOp):];
2544 if sField in TestInOut.kdFields \
2545 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2546 asSplit = sValueType.split(':', 1);
2547 sValue = asSplit[0];
2548 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2549 if sType in TestInOut.kdTypes:
2550 oValid = TestInOut.kdTypes[sType].validate(sValue);
2551 if oValid is True:
2552 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2553 oItem = TestInOut(sField, sOp, sValue, sType);
2554 else:
2555 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2556 % ( sTag, sDesc, sItem, ));
2557 else:
2558 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2559 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2560 else:
2561 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2562 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2563 else:
2564 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2565 % ( sTag, sDesc, sField, sItem,
2566 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2567 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2568 break;
2569 if oItem is not None:
2570 for oExisting in aoDst:
2571 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2572 self.errorComment(iTagLine,
2573 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2574 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2575 aoDst.append(oItem);
2576 else:
2577 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2578
2579 #
2580 # .
2581 #
2582 if fRc:
2583 oInstr.aoTests.append(oTest);
2584 else:
2585 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2586 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2587 % (sTag, asSelectors, asInputs, asOutputs,));
2588
2589 _ = iEndLine;
2590 return True;
2591
2592 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2593 """
2594 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2595 """
2596 oInstr = self.ensureInstructionForOpTag(iTagLine);
2597
2598 iTest = 0;
2599 if sTag[-1] == ']':
2600 iTest = int(sTag[8:-1]);
2601 else:
2602 iTest = int(sTag[7:]);
2603
2604 if iTest != len(oInstr.aoTests):
2605 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2606 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2607
2608 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2609 """
2610 Tag: \@optestign | \@optestignore
2611 Value: <value is ignored>
2612
2613 This is a simple trick to ignore a test while debugging another.
2614
2615 See also \@oponlytest.
2616 """
2617 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2618 return True;
2619
2620 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2621 """
2622 Tag: \@opcopytests
2623 Value: <opstat | function> [..]
2624 Example: \@opcopytests add_Eb_Gb
2625
2626 Trick to avoid duplicating tests for different encodings of the same
2627 operation.
2628 """
2629 oInstr = self.ensureInstructionForOpTag(iTagLine);
2630
2631 # Flatten, validate and append the copy job to the instruction. We execute
2632 # them after parsing all the input so we can handle forward references.
2633 asToCopy = self.flattenAllSections(aasSections).split();
2634 if not asToCopy:
2635 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2636 for sToCopy in asToCopy:
2637 if sToCopy not in oInstr.asCopyTests:
2638 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2639 oInstr.asCopyTests.append(sToCopy);
2640 else:
2641 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2642 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2643 else:
2644 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2645
2646 _ = iEndLine;
2647 return True;
2648
2649 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2650 """
2651 Tag: \@oponlytest | \@oponly
2652 Value: none
2653
2654 Only test instructions with this tag. This is a trick that is handy
2655 for singling out one or two new instructions or tests.
2656
2657 See also \@optestignore.
2658 """
2659 oInstr = self.ensureInstructionForOpTag(iTagLine);
2660
2661 # Validate and add instruction to only test dictionary.
2662 sValue = self.flattenAllSections(aasSections).strip();
2663 if sValue:
2664 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2665
2666 if oInstr not in g_aoOnlyTestInstructions:
2667 g_aoOnlyTestInstructions.append(oInstr);
2668
2669 _ = iEndLine;
2670 return True;
2671
2672 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2673 """
2674 Tag: \@opxcpttype
2675 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2676
2677 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2678 """
2679 oInstr = self.ensureInstructionForOpTag(iTagLine);
2680
2681 # Flatten as a space separated list, split it up and validate the values.
2682 asTypes = self.flattenAllSections(aasSections).split();
2683 if len(asTypes) != 1:
2684 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2685 sType = asTypes[0];
2686 if sType not in g_kdXcptTypes:
2687 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2688 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2689 # Set it.
2690 if oInstr.sXcptType is not None:
2691 return self.errorComment(iTagLine,
2692 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2693 % ( sTag, oInstr.sXcptType, sType,));
2694 oInstr.sXcptType = sType;
2695
2696 _ = iEndLine;
2697 return True;
2698
2699 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2700 """
2701 Tag: \@opfunction
2702 Value: <VMM function name>
2703
2704 This is for explicitly setting the IEM function name. Normally we pick
2705 this up from the FNIEMOP_XXX macro invocation after the description, or
2706 generate it from the mnemonic and operands.
2707
2708 It it thought it maybe necessary to set it when specifying instructions
2709 which implementation isn't following immediately or aren't implemented yet.
2710 """
2711 oInstr = self.ensureInstructionForOpTag(iTagLine);
2712
2713 # Flatten and validate the value.
2714 sFunction = self.flattenAllSections(aasSections);
2715 if not self.oReFunctionName.match(sFunction):
2716 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2717 % (sTag, sFunction, self.oReFunctionName.pattern));
2718
2719 if oInstr.sFunction is not None:
2720 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2721 % (sTag, oInstr.sFunction, sFunction,));
2722 oInstr.sFunction = sFunction;
2723
2724 _ = iEndLine;
2725 return True;
2726
2727 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2728 """
2729 Tag: \@opstats
2730 Value: <VMM statistics base name>
2731
2732 This is for explicitly setting the statistics name. Normally we pick
2733 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2734 the mnemonic and operands.
2735
2736 It it thought it maybe necessary to set it when specifying instructions
2737 which implementation isn't following immediately or aren't implemented yet.
2738 """
2739 oInstr = self.ensureInstructionForOpTag(iTagLine);
2740
2741 # Flatten and validate the value.
2742 sStats = self.flattenAllSections(aasSections);
2743 if not self.oReStatsName.match(sStats):
2744 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2745 % (sTag, sStats, self.oReStatsName.pattern));
2746
2747 if oInstr.sStats is not None:
2748 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2749 % (sTag, oInstr.sStats, sStats,));
2750 oInstr.sStats = sStats;
2751
2752 _ = iEndLine;
2753 return True;
2754
2755 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2756 """
2757 Tag: \@opdone
2758 Value: none
2759
2760 Used to explictily flush the instructions that have been specified.
2761 """
2762 sFlattened = self.flattenAllSections(aasSections);
2763 if sFlattened != '':
2764 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2765 _ = sTag; _ = iEndLine;
2766 return self.doneInstructions();
2767
2768 ## @}
2769
2770
2771 def parseComment(self):
2772 """
2773 Parse the current comment (self.sComment).
2774
2775 If it's a opcode specifiying comment, we reset the macro stuff.
2776 """
2777 #
2778 # Reject if comment doesn't seem to contain anything interesting.
2779 #
2780 if self.sComment.find('Opcode') < 0 \
2781 and self.sComment.find('@') < 0:
2782 return False;
2783
2784 #
2785 # Split the comment into lines, removing leading asterisks and spaces.
2786 # Also remove leading and trailing empty lines.
2787 #
2788 asLines = self.sComment.split('\n');
2789 for iLine, sLine in enumerate(asLines):
2790 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2791
2792 while asLines and not asLines[0]:
2793 self.iCommentLine += 1;
2794 asLines.pop(0);
2795
2796 while asLines and not asLines[-1]:
2797 asLines.pop(len(asLines) - 1);
2798
2799 #
2800 # Check for old style: Opcode 0x0f 0x12
2801 #
2802 if asLines[0].startswith('Opcode '):
2803 self.parseCommentOldOpcode(asLines);
2804
2805 #
2806 # Look for @op* tagged data.
2807 #
2808 cOpTags = 0;
2809 sFlatDefault = None;
2810 sCurTag = '@default';
2811 iCurTagLine = 0;
2812 asCurSection = [];
2813 aasSections = [ asCurSection, ];
2814 for iLine, sLine in enumerate(asLines):
2815 if not sLine.startswith('@'):
2816 if sLine:
2817 asCurSection.append(sLine);
2818 elif asCurSection:
2819 asCurSection = [];
2820 aasSections.append(asCurSection);
2821 else:
2822 #
2823 # Process the previous tag.
2824 #
2825 if not asCurSection and len(aasSections) > 1:
2826 aasSections.pop(-1);
2827 if sCurTag in self.dTagHandlers:
2828 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2829 cOpTags += 1;
2830 elif sCurTag.startswith('@op'):
2831 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2832 elif sCurTag == '@default':
2833 sFlatDefault = self.flattenAllSections(aasSections);
2834 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2835 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2836 elif sCurTag in ['@encoding', '@opencoding']:
2837 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2838
2839 #
2840 # New tag.
2841 #
2842 asSplit = sLine.split(None, 1);
2843 sCurTag = asSplit[0].lower();
2844 if len(asSplit) > 1:
2845 asCurSection = [asSplit[1],];
2846 else:
2847 asCurSection = [];
2848 aasSections = [asCurSection, ];
2849 iCurTagLine = iLine;
2850
2851 #
2852 # Process the final tag.
2853 #
2854 if not asCurSection and len(aasSections) > 1:
2855 aasSections.pop(-1);
2856 if sCurTag in self.dTagHandlers:
2857 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2858 cOpTags += 1;
2859 elif sCurTag.startswith('@op'):
2860 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2861 elif sCurTag == '@default':
2862 sFlatDefault = self.flattenAllSections(aasSections);
2863
2864 #
2865 # Don't allow default text in blocks containing @op*.
2866 #
2867 if cOpTags > 0 and sFlatDefault:
2868 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2869
2870 return True;
2871
2872 def parseMacroInvocation(self, sInvocation):
2873 """
2874 Parses a macro invocation.
2875
2876 Returns a tuple, first element is the offset following the macro
2877 invocation. The second element is a list of macro arguments, where the
2878 zero'th is the macro name.
2879 """
2880 # First the name.
2881 offOpen = sInvocation.find('(');
2882 if offOpen <= 0:
2883 self.raiseError("macro invocation open parenthesis not found");
2884 sName = sInvocation[:offOpen].strip();
2885 if not self.oReMacroName.match(sName):
2886 return self.error("invalid macro name '%s'" % (sName,));
2887 asRet = [sName, ];
2888
2889 # Arguments.
2890 iLine = self.iLine;
2891 cDepth = 1;
2892 off = offOpen + 1;
2893 offStart = off;
2894 while cDepth > 0:
2895 if off >= len(sInvocation):
2896 if iLine >= len(self.asLines):
2897 return self.error('macro invocation beyond end of file');
2898 sInvocation += self.asLines[iLine];
2899 iLine += 1;
2900 ch = sInvocation[off];
2901
2902 if ch == ',' or ch == ')':
2903 if cDepth == 1:
2904 asRet.append(sInvocation[offStart:off].strip());
2905 offStart = off + 1;
2906 if ch == ')':
2907 cDepth -= 1;
2908 elif ch == '(':
2909 cDepth += 1;
2910 off += 1;
2911
2912 return (off, asRet);
2913
2914 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2915 """
2916 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2917 """
2918 offHit = sCode.find(sMacro);
2919 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2920 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2921 return (offHit + offAfter, asRet);
2922 return (len(sCode), None);
2923
2924 def findAndParseMacroInvocation(self, sCode, sMacro):
2925 """
2926 Returns None if not found, arguments as per parseMacroInvocation if found.
2927 """
2928 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2929
2930 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2931 """
2932 Returns same as findAndParseMacroInvocation.
2933 """
2934 for sMacro in asMacro:
2935 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2936 if asRet is not None:
2937 return asRet;
2938 return None;
2939
2940 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2941 sDisHints, sIemHints, asOperands):
2942 """
2943 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2944 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2945 """
2946 #
2947 # Some invocation checks.
2948 #
2949 if sUpper != sUpper.upper():
2950 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2951 if sLower != sLower.lower():
2952 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2953 if sUpper.lower() != sLower:
2954 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2955 if not self.oReMnemonic.match(sLower):
2956 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2957
2958 #
2959 # Check if sIemHints tells us to not consider this macro invocation.
2960 #
2961 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2962 return True;
2963
2964 # Apply to the last instruction only for now.
2965 if not self.aoCurInstrs:
2966 self.addInstruction();
2967 oInstr = self.aoCurInstrs[-1];
2968 if oInstr.iLineMnemonicMacro == -1:
2969 oInstr.iLineMnemonicMacro = self.iLine;
2970 else:
2971 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2972 % (sMacro, oInstr.iLineMnemonicMacro,));
2973
2974 # Mnemonic
2975 if oInstr.sMnemonic is None:
2976 oInstr.sMnemonic = sLower;
2977 elif oInstr.sMnemonic != sLower:
2978 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2979
2980 # Process operands.
2981 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2982 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2983 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2984 for iOperand, sType in enumerate(asOperands):
2985 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2986 if sWhere is None:
2987 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2988 if iOperand < len(oInstr.aoOperands): # error recovery.
2989 sWhere = oInstr.aoOperands[iOperand].sWhere;
2990 sType = oInstr.aoOperands[iOperand].sType;
2991 else:
2992 sWhere = 'reg';
2993 sType = 'Gb';
2994 if iOperand == len(oInstr.aoOperands):
2995 oInstr.aoOperands.append(Operand(sWhere, sType))
2996 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2997 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2998 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2999 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
3000
3001 # Encoding.
3002 if sForm not in g_kdIemForms:
3003 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
3004 else:
3005 if oInstr.sEncoding is None:
3006 oInstr.sEncoding = g_kdIemForms[sForm][0];
3007 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
3008 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
3009 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
3010
3011 # Check the parameter locations for the encoding.
3012 if g_kdIemForms[sForm][1] is not None:
3013 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
3014 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
3015 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
3016 else:
3017 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
3018 if oInstr.aoOperands[iOperand].sWhere != sWhere:
3019 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
3020 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
3021
3022 # Stats.
3023 if not self.oReStatsName.match(sStats):
3024 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
3025 elif oInstr.sStats is None:
3026 oInstr.sStats = sStats;
3027 elif oInstr.sStats != sStats:
3028 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
3029 % (sMacro, oInstr.sStats, sStats,));
3030
3031 # Process the hints (simply merge with @ophints w/o checking anything).
3032 for sHint in sDisHints.split('|'):
3033 sHint = sHint.strip();
3034 if sHint.startswith('DISOPTYPE_'):
3035 sShortHint = sHint[len('DISOPTYPE_'):].lower();
3036 if sShortHint in g_kdHints:
3037 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3038 else:
3039 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
3040 elif sHint != '0':
3041 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
3042
3043 for sHint in sIemHints.split('|'):
3044 sHint = sHint.strip();
3045 if sHint.startswith('IEMOPHINT_'):
3046 sShortHint = sHint[len('IEMOPHINT_'):].lower();
3047 if sShortHint in g_kdHints:
3048 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3049 else:
3050 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
3051 elif sHint != '0':
3052 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
3053
3054
3055 _ = sAsm;
3056 return True;
3057
3058 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
3059 """
3060 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
3061 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
3062 """
3063 if not asOperands:
3064 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3065 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
3066 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3067
3068 def checkCodeForMacro(self, sCode):
3069 """
3070 Checks code for relevant macro invocation.
3071 """
3072 #
3073 # Scan macro invocations.
3074 #
3075 if sCode.find('(') > 0:
3076 # Look for instruction decoder function definitions. ASSUME single line.
3077 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3078 [ 'FNIEMOP_DEF',
3079 'FNIEMOP_STUB',
3080 'FNIEMOP_STUB_1',
3081 'FNIEMOP_UD_STUB',
3082 'FNIEMOP_UD_STUB_1' ]);
3083 if asArgs is not None:
3084 sFunction = asArgs[1];
3085
3086 if not self.aoCurInstrs:
3087 self.addInstruction();
3088 for oInstr in self.aoCurInstrs:
3089 if oInstr.iLineFnIemOpMacro == -1:
3090 oInstr.iLineFnIemOpMacro = self.iLine;
3091 else:
3092 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3093 self.setInstrunctionAttrib('sFunction', sFunction);
3094 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3095 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3096 if asArgs[0].find('STUB') > 0:
3097 self.doneInstructions();
3098 return True;
3099
3100 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3101 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3102 if asArgs is not None:
3103 if len(self.aoCurInstrs) == 1:
3104 oInstr = self.aoCurInstrs[0];
3105 if oInstr.sStats is None:
3106 oInstr.sStats = asArgs[1];
3107 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3108
3109 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3110 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3111 if asArgs is not None:
3112 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3113 []);
3114 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3115 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3116 if asArgs is not None:
3117 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3118 [asArgs[6],]);
3119 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3120 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3121 if asArgs is not None:
3122 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3123 [asArgs[6], asArgs[7]]);
3124 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3125 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3126 if asArgs is not None:
3127 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3128 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3129 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3130 # a_fIemHints)
3131 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3132 if asArgs is not None:
3133 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3134 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3135
3136 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3137 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3138 if asArgs is not None:
3139 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3140 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3141 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3142 if asArgs is not None:
3143 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3144 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3145 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3146 if asArgs is not None:
3147 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3148 [asArgs[4], asArgs[5],]);
3149 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3150 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3151 if asArgs is not None:
3152 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3153 [asArgs[4], asArgs[5], asArgs[6],]);
3154 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3155 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3156 if asArgs is not None:
3157 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3158 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3159
3160 return False;
3161
3162
3163 def parse(self):
3164 """
3165 Parses the given file.
3166 Returns number or errors.
3167 Raises exception on fatal trouble.
3168 """
3169 #self.debug('Parsing %s' % (self.sSrcFile,));
3170
3171 while self.iLine < len(self.asLines):
3172 sLine = self.asLines[self.iLine];
3173 self.iLine += 1;
3174
3175 # We only look for comments, so only lines with a slash might possibly
3176 # influence the parser state.
3177 offSlash = sLine.find('/');
3178 if offSlash >= 0:
3179 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3180 offLine = 0;
3181 while offLine < len(sLine):
3182 if self.iState == self.kiCode:
3183 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3184 if offHit >= 0:
3185 self.checkCodeForMacro(sLine[offLine:offHit]);
3186 self.sComment = '';
3187 self.iCommentLine = self.iLine;
3188 self.iState = self.kiCommentMulti;
3189 offLine = offHit + 2;
3190 else:
3191 self.checkCodeForMacro(sLine[offLine:]);
3192 offLine = len(sLine);
3193
3194 elif self.iState == self.kiCommentMulti:
3195 offHit = sLine.find('*/', offLine);
3196 if offHit >= 0:
3197 self.sComment += sLine[offLine:offHit];
3198 self.iState = self.kiCode;
3199 offLine = offHit + 2;
3200 self.parseComment();
3201 else:
3202 self.sComment += sLine[offLine:];
3203 offLine = len(sLine);
3204 else:
3205 assert False;
3206 # C++ line comment.
3207 elif offSlash > 0:
3208 self.checkCodeForMacro(sLine[:offSlash]);
3209
3210 # No slash, but append the line if in multi-line comment.
3211 elif self.iState == self.kiCommentMulti:
3212 #self.debug('line %d: multi' % (self.iLine,));
3213 self.sComment += sLine;
3214
3215 # No slash, but check code line for relevant macro.
3216 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3217 #self.debug('line %d: macro' % (self.iLine,));
3218 self.checkCodeForMacro(sLine);
3219
3220 # If the line is a '}' in the first position, complete the instructions.
3221 elif self.iState == self.kiCode and sLine[0] == '}':
3222 #self.debug('line %d: }' % (self.iLine,));
3223 self.doneInstructions();
3224
3225 self.doneInstructions();
3226 self.debug('%3s stubs out of %3s instructions in %s'
3227 % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3228 return self.printErrors();
3229
3230
3231def __parseFileByName(sSrcFile, sDefaultMap):
3232 """
3233 Parses one source file for instruction specfications.
3234 """
3235 #
3236 # Read sSrcFile into a line array.
3237 #
3238 try:
3239 oFile = open(sSrcFile, "r");
3240 except Exception as oXcpt:
3241 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3242 try:
3243 asLines = oFile.readlines();
3244 except Exception as oXcpt:
3245 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3246 finally:
3247 oFile.close();
3248
3249 #
3250 # Do the parsing.
3251 #
3252 try:
3253 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3254 except ParserException as oXcpt:
3255 print(str(oXcpt));
3256 raise;
3257 except Exception as oXcpt:
3258 raise;
3259
3260 return cErrors;
3261
3262
3263def __doTestCopying():
3264 """
3265 Executes the asCopyTests instructions.
3266 """
3267 asErrors = [];
3268 for oDstInstr in g_aoAllInstructions:
3269 if oDstInstr.asCopyTests:
3270 for sSrcInstr in oDstInstr.asCopyTests:
3271 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3272 if oSrcInstr:
3273 aoSrcInstrs = [oSrcInstr,];
3274 else:
3275 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3276 if aoSrcInstrs:
3277 for oSrcInstr in aoSrcInstrs:
3278 if oSrcInstr != oDstInstr:
3279 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3280 else:
3281 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3282 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3283 else:
3284 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3285 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3286
3287 if asErrors:
3288 sys.stderr.write(u''.join(asErrors));
3289 return len(asErrors);
3290
3291
3292def __applyOnlyTest():
3293 """
3294 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3295 all other instructions so that only these get tested.
3296 """
3297 if g_aoOnlyTestInstructions:
3298 for oInstr in g_aoAllInstructions:
3299 if oInstr.aoTests:
3300 if oInstr not in g_aoOnlyTestInstructions:
3301 oInstr.aoTests = [];
3302 return 0;
3303
3304def __parseAll():
3305 """
3306 Parses all the IEMAllInstruction*.cpp.h files.
3307
3308 Raises exception on failure.
3309 """
3310 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3311 cErrors = 0;
3312 for sDefaultMap, sName in [
3313 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3314 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3315 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3316 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3317 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3318 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3319 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3320 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3321 ]:
3322 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3323 cErrors += __doTestCopying();
3324 cErrors += __applyOnlyTest();
3325
3326 if cErrors != 0:
3327 #raise Exception('%d parse errors' % (cErrors,));
3328 sys.exit(1);
3329 return True;
3330
3331
3332
3333__parseAll();
3334
3335
3336#
3337# Generators (may perhaps move later).
3338#
3339def generateDisassemblerTables(oDstFile = sys.stdout):
3340 """
3341 Generates disassembler tables.
3342 """
3343
3344 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3345 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3346 assert oMap.sName == sName;
3347 asLines = [];
3348
3349 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3350 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3351 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3352 asLines.append('{');
3353
3354 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3355
3356 aoTableOrder = oMap.getInstructionsInTableOrder();
3357 for iInstr, oInstr in enumerate(aoTableOrder):
3358
3359 if (iInstr & 0xf) == 0:
3360 if iInstr != 0:
3361 asLines.append('');
3362 asLines.append(' /* %x */' % (iInstr >> 4,));
3363
3364 if oInstr is None:
3365 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3366 elif isinstance(oInstr, list):
3367 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3368 else:
3369 sMacro = 'OP';
3370 cMaxOperands = 3;
3371 if len(oInstr.aoOperands) > 3:
3372 sMacro = 'OPVEX'
3373 cMaxOperands = 4;
3374 assert len(oInstr.aoOperands) <= cMaxOperands;
3375
3376 #
3377 # Format string.
3378 #
3379 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3380 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3381 sTmp += ' ' if iOperand == 0 else ',';
3382 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3383 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3384 else:
3385 sTmp += g_kdOpTypes[oOperand.sType][2];
3386 sTmp += '",';
3387 asColumns = [ sTmp, ];
3388
3389 #
3390 # Decoders.
3391 #
3392 iStart = len(asColumns);
3393 if oInstr.sEncoding is None:
3394 pass;
3395 elif oInstr.sEncoding == 'ModR/M':
3396 # ASSUME the first operand is using the ModR/M encoding
3397 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3398 asColumns.append('IDX_ParseModRM,');
3399 ## @todo IDX_ParseVexDest
3400 # Is second operand using ModR/M too?
3401 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3402 asColumns.append('IDX_UseModRM,')
3403 elif oInstr.sEncoding in [ 'prefix', ]:
3404 for oOperand in oInstr.aoOperands:
3405 asColumns.append('0,');
3406 elif oInstr.sEncoding in [ 'fixed' ]:
3407 pass;
3408 elif oInstr.sEncoding == 'vex2':
3409 asColumns.append('IDX_ParseVex2b,')
3410 elif oInstr.sEncoding == 'vex3':
3411 asColumns.append('IDX_ParseVex3b,')
3412 elif oInstr.sEncoding in g_dInstructionMaps:
3413 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3414 else:
3415 ## @todo
3416 #IDX_ParseTwoByteEsc,
3417 #IDX_ParseGrp1,
3418 #IDX_ParseShiftGrp2,
3419 #IDX_ParseGrp3,
3420 #IDX_ParseGrp4,
3421 #IDX_ParseGrp5,
3422 #IDX_Parse3DNow,
3423 #IDX_ParseGrp6,
3424 #IDX_ParseGrp7,
3425 #IDX_ParseGrp8,
3426 #IDX_ParseGrp9,
3427 #IDX_ParseGrp10,
3428 #IDX_ParseGrp12,
3429 #IDX_ParseGrp13,
3430 #IDX_ParseGrp14,
3431 #IDX_ParseGrp15,
3432 #IDX_ParseGrp16,
3433 #IDX_ParseThreeByteEsc4,
3434 #IDX_ParseThreeByteEsc5,
3435 #IDX_ParseModFence,
3436 #IDX_ParseEscFP,
3437 #IDX_ParseNopPause,
3438 #IDX_ParseInvOpModRM,
3439 assert False, str(oInstr);
3440
3441 # Check for immediates and stuff in the remaining operands.
3442 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3443 sIdx = g_kdOpTypes[oOperand.sType][0];
3444 if sIdx != 'IDX_UseModRM':
3445 asColumns.append(sIdx + ',');
3446 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3447
3448 #
3449 # Opcode and operands.
3450 #
3451 assert oInstr.sDisEnum, str(oInstr);
3452 asColumns.append(oInstr.sDisEnum + ',');
3453 iStart = len(asColumns)
3454 for oOperand in oInstr.aoOperands:
3455 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3456 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3457
3458 #
3459 # Flags.
3460 #
3461 sTmp = '';
3462 for sHint in sorted(oInstr.dHints.keys()):
3463 sDefine = g_kdHints[sHint];
3464 if sDefine.startswith('DISOPTYPE_'):
3465 if sTmp:
3466 sTmp += ' | ' + sDefine;
3467 else:
3468 sTmp += sDefine;
3469 if sTmp:
3470 sTmp += '),';
3471 else:
3472 sTmp += '0),';
3473 asColumns.append(sTmp);
3474
3475 #
3476 # Format the columns into a line.
3477 #
3478 sLine = '';
3479 for i, s in enumerate(asColumns):
3480 if len(sLine) < aoffColumns[i]:
3481 sLine += ' ' * (aoffColumns[i] - len(sLine));
3482 else:
3483 sLine += ' ';
3484 sLine += s;
3485
3486 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3487 # DISOPTYPE_HARMLESS),
3488 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3489 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3490
3491 asLines.append(sLine);
3492
3493 asLines.append('};');
3494 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3495
3496 #
3497 # Write out the lines.
3498 #
3499 oDstFile.write('\n'.join(asLines));
3500 oDstFile.write('\n');
3501 break; #for now
3502
3503if __name__ == '__main__':
3504 generateDisassemblerTables();
3505
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette