VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 67007

Last change on this file since 67007 was 67007, checked in by vboxsync, 8 years ago

IEM: Tested and adjusted movq Pq,Qq (0x0f 0x6f).

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 146.2 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 67007 2017-05-22 11:52:13Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 67007 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', ),
217 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', ),
218 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
219 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
220 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', ),
221 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', ),
222 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
223 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
224 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
225 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
226 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
227 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
228 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
229 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
230 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
231 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
232 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
233 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
234 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', ),
235
236 # ModR/M.rm - register only.
237 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
238 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
239 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
240 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
241 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
242 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
243 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', ),
244
245 # ModR/M.rm - memory only.
246 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
247 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
248 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
249 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
250 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
251 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
252 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
253 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', ),
254 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', ),
255 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
256 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
257
258 # ModR/M.reg
259 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
260 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
261 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
262 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
263 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', ),
264 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', ),
265 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', ),
266 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', ),
267 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', ),
268 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', ),
269 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
270 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
271 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
272 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
273 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
274 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
275 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
276 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
277 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
278 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
279 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
280 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
281 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', ),
282 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
283 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
284 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', ),
285 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', ),
286
287 # VEX.vvvv
288 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', ),
289 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', ),
290 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', ),
291
292 # Immediate values.
293 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
294 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
295 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
296 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
297 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
298 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
299
300 # Address operands (no ModR/M).
301 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
302 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
303
304 # Relative jump targets
305 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
306 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
307
308 # DS:rSI
309 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
310 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
311 # ES:rDI
312 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
313 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
314
315 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
316
317 # Fixed registers.
318 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
319 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
320 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
321 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
322 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
323 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
324 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
325 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
326};
327
328# IDX_ParseFixedReg
329# IDX_ParseVexDest
330
331
332## IEMFORM_XXX mappings.
333g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
334 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
335 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
336 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
337 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
338 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
339 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
340 'M': ( 'ModR/M', [ 'rm', ], ),
341 'M_REG': ( 'ModR/M', [ 'rm', ], ),
342 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
343 'R': ( 'ModR/M', [ 'reg', ], ),
344
345 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
346 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
347 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
348 'VEX_XM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ), # same as VEX_RM_MEM, typo?
349 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
350 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
351 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
352 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
353 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
354 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
355 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
356 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
357 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
358 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
359 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
360 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
361 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
362
363 'FIXED': ( 'fixed', None, )
364};
365
366## \@oppfx values.
367g_kdPrefixes = {
368 'none': [],
369 '0x66': [],
370 '0xf3': [],
371 '0xf2': [],
372};
373
374## Special \@opcode tag values.
375g_kdSpecialOpcodes = {
376 '/reg': [],
377 'mr/reg': [],
378 '11 /reg': [],
379 '!11 /reg': [],
380 '11 mr/reg': [],
381 '!11 mr/reg': [],
382};
383
384## Special \@opcodesub tag values.
385g_kdSubOpcodes = {
386 'none': [ None, ],
387 '11 mr/reg': [ '11 mr/reg', ],
388 '11': [ '11 mr/reg', ], ##< alias
389 '!11 mr/reg': [ '!11 mr/reg', ],
390 '!11': [ '!11 mr/reg', ], ##< alias
391 'rex.w=0': [ 'rex.w=0', ],
392 'w=0': [ 'rex.w=0', ], ##< alias
393 'rex.w=1': [ 'rex.w=1', ],
394 'w=1': [ 'rex.w=1', ], ##< alias
395};
396
397## Valid values for \@openc
398g_kdEncodings = {
399 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
400 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
401 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
402 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
403 'prefix': [ None, ], ##< Prefix
404};
405
406## \@opunused, \@opinvalid, \@opinvlstyle
407g_kdInvalidStyles = {
408 'immediate': [], ##< CPU stops decoding immediately after the opcode.
409 'vex.modrm': [], ##< VEX+ModR/M, everyone.
410 'intel-modrm': [], ##< Intel decodes ModR/M.
411 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
412 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
413 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
414};
415
416g_kdCpuNames = {
417 '8086': (),
418 '80186': (),
419 '80286': (),
420 '80386': (),
421 '80486': (),
422};
423
424## \@opcpuid
425g_kdCpuIdFlags = {
426 'vme': 'X86_CPUID_FEATURE_EDX_VME',
427 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
428 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
429 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
430 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
431 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
432 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
433 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
434 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
435 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
436 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
437 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
438 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
439 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
440 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
441 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
442 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
443 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
444 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
445 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
446 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
447 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
448 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
449 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
450 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
451 'aes': 'X86_CPUID_FEATURE_ECX_AES',
452 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
453 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
454 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
455 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
456
457 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
458 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
459 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
460 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
461 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
462 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
463 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
464 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
465 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
466 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
467};
468
469## \@ophints values.
470g_kdHints = {
471 'invalid': 'DISOPTYPE_INVALID', ##<
472 'harmless': 'DISOPTYPE_HARMLESS', ##<
473 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
474 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
475 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
476 'portio': 'DISOPTYPE_PORTIO', ##<
477 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
478 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
479 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
480 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
481 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
482 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
483 'illegal': 'DISOPTYPE_ILLEGAL', ##<
484 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
485 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
486 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
487 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
488 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
489 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
490 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
491 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
492 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
493 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
494 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
495 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
496 ## (only in 16 & 32 bits mode!)
497 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
498 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
499 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
500 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
501 'ignores_rexw': '', ##< Ignores REX.W.
502 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
503 'ignores_vex_l': '', ##< Ignores VEX.L.
504 'vex_l_zero': '', ##< VEX.L must be 0.
505 'lock_allowed': '', ##< Lock prefix allowed.
506};
507
508## \@opxcpttype values (see SDMv2 2.4, 2.7).
509g_kdXcptTypes = {
510 'none': [],
511 '1': [],
512 '2': [],
513 '3': [],
514 '4': [],
515 '4UA': [],
516 '5': [],
517 '5LZ': [], # LZ = VEX.L must be zero.
518 '6': [],
519 '7': [],
520 '7LZ': [],
521 '8': [],
522 '11': [],
523 '12': [],
524 'E1': [],
525 'E1NF': [],
526 'E2': [],
527 'E3': [],
528 'E3NF': [],
529 'E4': [],
530 'E4NF': [],
531 'E5': [],
532 'E5NF': [],
533 'E6': [],
534 'E6NF': [],
535 'E7NF': [],
536 'E9': [],
537 'E9NF': [],
538 'E10': [],
539 'E11': [],
540 'E12': [],
541 'E12NF': [],
542};
543
544
545def _isValidOpcodeByte(sOpcode):
546 """
547 Checks if sOpcode is a valid lower case opcode byte.
548 Returns true/false.
549 """
550 if len(sOpcode) == 4:
551 if sOpcode[:2] == '0x':
552 if sOpcode[2] in '0123456789abcdef':
553 if sOpcode[3] in '0123456789abcdef':
554 return True;
555 return False;
556
557
558class InstructionMap(object):
559 """
560 Instruction map.
561
562 The opcode map provides the lead opcode bytes (empty for the one byte
563 opcode map). An instruction can be member of multiple opcode maps as long
564 as it uses the same opcode value within the map (because of VEX).
565 """
566
567 kdEncodings = {
568 'legacy': [],
569 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
570 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
571 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
572 'xop8': [], ##< XOP prefix with vvvvv = 8
573 'xop9': [], ##< XOP prefix with vvvvv = 9
574 'xop10': [], ##< XOP prefix with vvvvv = 10
575 };
576 ## Selectors.
577 ## The first value is the number of table entries required by a
578 ## decoder or disassembler for this type of selector.
579 kdSelectors = {
580 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
581 '/r': [ 8, ], ##< modrm.reg selects the instruction.
582 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
583 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
584 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
585 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
586 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
587 };
588
589 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
590 assert sSelector in self.kdSelectors;
591 assert sEncoding in self.kdEncodings;
592 if asLeadOpcodes is None:
593 asLeadOpcodes = [];
594 else:
595 for sOpcode in asLeadOpcodes:
596 assert _isValidOpcodeByte(sOpcode);
597 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
598
599 self.sName = sName;
600 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
601 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
602 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
603 self.aoInstructions = []; # type: Instruction
604 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
605
606 def getTableSize(self):
607 """
608 Number of table entries. This corresponds directly to the selector.
609 """
610 return self.kdSelectors[self.sSelector][0];
611
612 def getInstructionIndex(self, oInstr):
613 """
614 Returns the table index for the instruction.
615 """
616 bOpcode = oInstr.getOpcodeByte();
617
618 # The byte selector is simple. We need a full opcode byte and need just return it.
619 if self.sSelector == 'byte':
620 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
621 return bOpcode;
622
623 # The other selectors needs masking and shifting.
624 if self.sSelector == '/r':
625 return (bOpcode >> 3) & 0x7;
626
627 if self.sSelector == 'mod /r':
628 return (bOpcode >> 3) & 0x1f;
629
630 if self.sSelector == 'memreg /r':
631 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
632
633 if self.sSelector == '!11 /r':
634 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
635 return (bOpcode >> 3) & 0x7;
636
637 if self.sSelector == '11 /r':
638 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
639 return (bOpcode >> 3) & 0x7;
640
641 if self.sSelector == '11':
642 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
643 return bOpcode & 0x3f;
644
645 assert False, self.sSelector;
646 return -1;
647
648 def getInstructionsInTableOrder(self):
649 """
650 Get instructions in table order.
651
652 Returns array of instructions. Normally there is exactly one
653 instruction per entry. However the entry could also be None if
654 not instruction was specified for that opcode value. Or there
655 could be a list of instructions to deal with special encodings
656 where for instance prefix (e.g. REX.W) encodes a different
657 instruction or different CPUs have different instructions or
658 prefixes in the same place.
659 """
660 # Start with empty table.
661 cTable = self.getTableSize();
662 aoTable = [None] * cTable;
663
664 # Insert the instructions.
665 for oInstr in self.aoInstructions:
666 if oInstr.sOpcode:
667 idxOpcode = self.getInstructionIndex(oInstr);
668 assert idxOpcode < cTable, str(idxOpcode);
669
670 oExisting = aoTable[idxOpcode];
671 if oExisting is None:
672 aoTable[idxOpcode] = oInstr;
673 elif not isinstance(oExisting, list):
674 aoTable[idxOpcode] = list([oExisting, oInstr]);
675 else:
676 oExisting.append(oInstr);
677
678 return aoTable;
679
680
681 def getDisasTableName(self):
682 """
683 Returns the disassembler table name for this map.
684 """
685 sName = 'g_aDisas';
686 for sWord in self.sName.split('_'):
687 if sWord == 'm': # suffix indicating modrm.mod==mem
688 sName += '_m';
689 elif sWord == 'r': # suffix indicating modrm.mod==reg
690 sName += '_r';
691 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
692 sName += '_' + sWord;
693 else:
694 sWord = sWord.replace('grp', 'Grp');
695 sWord = sWord.replace('map', 'Map');
696 sName += sWord[0].upper() + sWord[1:];
697 return sName;
698
699
700 def isVexMap(self):
701 """ Returns True if a VEX map. """
702 return self.sEncoding.startswith('vex');
703
704
705class TestType(object):
706 """
707 Test value type.
708
709 This base class deals with integer like values. The fUnsigned constructor
710 parameter indicates the default stance on zero vs sign extending. It is
711 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
712 """
713 def __init__(self, sName, acbSizes = None, fUnsigned = True):
714 self.sName = sName;
715 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
716 self.fUnsigned = fUnsigned;
717
718 class BadValue(Exception):
719 """ Bad value exception. """
720 def __init__(self, sMessage):
721 Exception.__init__(self, sMessage);
722 self.sMessage = sMessage;
723
724 ## For ascii ~ operator.
725 kdHexInv = {
726 '0': 'f',
727 '1': 'e',
728 '2': 'd',
729 '3': 'c',
730 '4': 'b',
731 '5': 'a',
732 '6': '9',
733 '7': '8',
734 '8': '7',
735 '9': '6',
736 'a': '5',
737 'b': '4',
738 'c': '3',
739 'd': '2',
740 'e': '1',
741 'f': '0',
742 };
743
744 def get(self, sValue):
745 """
746 Get the shortest normal sized byte representation of oValue.
747
748 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
749 The latter form is for AND+OR pairs where the first entry is what to
750 AND with the field and the second the one or OR with.
751
752 Raises BadValue if invalid value.
753 """
754 if not sValue:
755 raise TestType.BadValue('empty value');
756
757 # Deal with sign and detect hexadecimal or decimal.
758 fSignExtend = not self.fUnsigned;
759 if sValue[0] == '-' or sValue[0] == '+':
760 fSignExtend = True;
761 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
762 else:
763 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
764
765 # try convert it to long integer.
766 try:
767 iValue = long(sValue, 16 if fHex else 10);
768 except Exception as oXcpt:
769 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
770
771 # Convert the hex string and pad it to a decent value. Negative values
772 # needs to be manually converted to something non-negative (~-n + 1).
773 if iValue >= 0:
774 sHex = hex(iValue);
775 if sys.version_info[0] < 3:
776 assert sHex[-1] == 'L';
777 sHex = sHex[:-1];
778 assert sHex[:2] == '0x';
779 sHex = sHex[2:];
780 else:
781 sHex = hex(-iValue - 1);
782 if sys.version_info[0] < 3:
783 assert sHex[-1] == 'L';
784 sHex = sHex[:-1];
785 assert sHex[:2] == '0x';
786 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
787 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
788 sHex = 'f' + sHex;
789
790 cDigits = len(sHex);
791 if cDigits <= self.acbSizes[-1] * 2:
792 for cb in self.acbSizes:
793 cNaturalDigits = cb * 2;
794 if cDigits <= cNaturalDigits:
795 break;
796 else:
797 cNaturalDigits = self.acbSizes[-1] * 2;
798 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
799 assert isinstance(cNaturalDigits, int)
800
801 if cNaturalDigits != cDigits:
802 cNeeded = cNaturalDigits - cDigits;
803 if iValue >= 0:
804 sHex = ('0' * cNeeded) + sHex;
805 else:
806 sHex = ('f' * cNeeded) + sHex;
807
808 # Invert and convert to bytearray and return it.
809 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
810
811 return ((fSignExtend, abValue),);
812
813 def validate(self, sValue):
814 """
815 Returns True if value is okay, error message on failure.
816 """
817 try:
818 self.get(sValue);
819 except TestType.BadValue as oXcpt:
820 return oXcpt.sMessage;
821 return True;
822
823 def isAndOrPair(self, sValue):
824 """
825 Checks if sValue is a pair.
826 """
827 _ = sValue;
828 return False;
829
830
831class TestTypeEflags(TestType):
832 """
833 Special value parsing for EFLAGS/RFLAGS/FLAGS.
834 """
835
836 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
837
838 def __init__(self, sName):
839 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
840
841 def get(self, sValue):
842 fClear = 0;
843 fSet = 0;
844 for sFlag in sValue.split(','):
845 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
846 if sConstant is None:
847 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
848 if sConstant[0] == '!':
849 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
850 else:
851 fSet |= g_kdX86EFlagsConstants[sConstant];
852
853 aoSet = TestType.get(self, '0x%x' % (fSet,));
854 if fClear != 0:
855 aoClear = TestType.get(self, '%#x' % (fClear,))
856 assert self.isAndOrPair(sValue) is True;
857 return (aoClear[0], aoSet[0]);
858 assert self.isAndOrPair(sValue) is False;
859 return aoSet;
860
861 def isAndOrPair(self, sValue):
862 for sZeroFlag in self.kdZeroValueFlags:
863 if sValue.find(sZeroFlag) >= 0:
864 return True;
865 return False;
866
867class TestTypeFromDict(TestType):
868 """
869 Special value parsing for CR0.
870 """
871
872 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
873
874 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
875 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
876 self.kdConstantsAndValues = kdConstantsAndValues;
877 self.sConstantPrefix = sConstantPrefix;
878
879 def get(self, sValue):
880 fValue = 0;
881 for sFlag in sValue.split(','):
882 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
883 if fFlagValue is None:
884 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
885 fValue |= fFlagValue;
886 return TestType.get(self, '0x%x' % (fValue,));
887
888
889class TestInOut(object):
890 """
891 One input or output state modifier.
892
893 This should be thought as values to modify BS3REGCTX and extended (needs
894 to be structured) state.
895 """
896 ## Assigned operators.
897 kasOperators = [
898 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
899 '&~=',
900 '&=',
901 '|=',
902 '='
903 ];
904 ## Types
905 kdTypes = {
906 'uint': TestType('uint', fUnsigned = True),
907 'int': TestType('int'),
908 'efl': TestTypeEflags('efl'),
909 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
910 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
911 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
912 };
913 ## CPU context fields.
914 kdFields = {
915 # name: ( default type, [both|input|output], )
916 # Operands.
917 'op1': ( 'uint', 'both', ), ## \@op1
918 'op2': ( 'uint', 'both', ), ## \@op2
919 'op3': ( 'uint', 'both', ), ## \@op3
920 'op4': ( 'uint', 'both', ), ## \@op4
921 # Flags.
922 'efl': ( 'efl', 'both', ),
923 'efl_undef': ( 'uint', 'output', ),
924 # 8-bit GPRs.
925 'al': ( 'uint', 'both', ),
926 'cl': ( 'uint', 'both', ),
927 'dl': ( 'uint', 'both', ),
928 'bl': ( 'uint', 'both', ),
929 'ah': ( 'uint', 'both', ),
930 'ch': ( 'uint', 'both', ),
931 'dh': ( 'uint', 'both', ),
932 'bh': ( 'uint', 'both', ),
933 'r8l': ( 'uint', 'both', ),
934 'r9l': ( 'uint', 'both', ),
935 'r10l': ( 'uint', 'both', ),
936 'r11l': ( 'uint', 'both', ),
937 'r12l': ( 'uint', 'both', ),
938 'r13l': ( 'uint', 'both', ),
939 'r14l': ( 'uint', 'both', ),
940 'r15l': ( 'uint', 'both', ),
941 # 16-bit GPRs.
942 'ax': ( 'uint', 'both', ),
943 'dx': ( 'uint', 'both', ),
944 'cx': ( 'uint', 'both', ),
945 'bx': ( 'uint', 'both', ),
946 'sp': ( 'uint', 'both', ),
947 'bp': ( 'uint', 'both', ),
948 'si': ( 'uint', 'both', ),
949 'di': ( 'uint', 'both', ),
950 'r8w': ( 'uint', 'both', ),
951 'r9w': ( 'uint', 'both', ),
952 'r10w': ( 'uint', 'both', ),
953 'r11w': ( 'uint', 'both', ),
954 'r12w': ( 'uint', 'both', ),
955 'r13w': ( 'uint', 'both', ),
956 'r14w': ( 'uint', 'both', ),
957 'r15w': ( 'uint', 'both', ),
958 # 32-bit GPRs.
959 'eax': ( 'uint', 'both', ),
960 'edx': ( 'uint', 'both', ),
961 'ecx': ( 'uint', 'both', ),
962 'ebx': ( 'uint', 'both', ),
963 'esp': ( 'uint', 'both', ),
964 'ebp': ( 'uint', 'both', ),
965 'esi': ( 'uint', 'both', ),
966 'edi': ( 'uint', 'both', ),
967 'r8d': ( 'uint', 'both', ),
968 'r9d': ( 'uint', 'both', ),
969 'r10d': ( 'uint', 'both', ),
970 'r11d': ( 'uint', 'both', ),
971 'r12d': ( 'uint', 'both', ),
972 'r13d': ( 'uint', 'both', ),
973 'r14d': ( 'uint', 'both', ),
974 'r15d': ( 'uint', 'both', ),
975 # 64-bit GPRs.
976 'rax': ( 'uint', 'both', ),
977 'rdx': ( 'uint', 'both', ),
978 'rcx': ( 'uint', 'both', ),
979 'rbx': ( 'uint', 'both', ),
980 'rsp': ( 'uint', 'both', ),
981 'rbp': ( 'uint', 'both', ),
982 'rsi': ( 'uint', 'both', ),
983 'rdi': ( 'uint', 'both', ),
984 'r8': ( 'uint', 'both', ),
985 'r9': ( 'uint', 'both', ),
986 'r10': ( 'uint', 'both', ),
987 'r11': ( 'uint', 'both', ),
988 'r12': ( 'uint', 'both', ),
989 'r13': ( 'uint', 'both', ),
990 'r14': ( 'uint', 'both', ),
991 'r15': ( 'uint', 'both', ),
992 # 16-bit, 32-bit or 64-bit registers according to operand size.
993 'oz.rax': ( 'uint', 'both', ),
994 'oz.rdx': ( 'uint', 'both', ),
995 'oz.rcx': ( 'uint', 'both', ),
996 'oz.rbx': ( 'uint', 'both', ),
997 'oz.rsp': ( 'uint', 'both', ),
998 'oz.rbp': ( 'uint', 'both', ),
999 'oz.rsi': ( 'uint', 'both', ),
1000 'oz.rdi': ( 'uint', 'both', ),
1001 'oz.r8': ( 'uint', 'both', ),
1002 'oz.r9': ( 'uint', 'both', ),
1003 'oz.r10': ( 'uint', 'both', ),
1004 'oz.r11': ( 'uint', 'both', ),
1005 'oz.r12': ( 'uint', 'both', ),
1006 'oz.r13': ( 'uint', 'both', ),
1007 'oz.r14': ( 'uint', 'both', ),
1008 'oz.r15': ( 'uint', 'both', ),
1009 # Control registers.
1010 'cr0': ( 'cr0', 'both', ),
1011 'cr4': ( 'cr4', 'both', ),
1012 'xcr0': ( 'xcr0', 'both', ),
1013 # FPU Registers
1014 'fcw': ( 'uint', 'both', ),
1015 'fsw': ( 'uint', 'both', ),
1016 'ftw': ( 'uint', 'both', ),
1017 'fop': ( 'uint', 'both', ),
1018 'fpuip': ( 'uint', 'both', ),
1019 'fpucs': ( 'uint', 'both', ),
1020 'fpudp': ( 'uint', 'both', ),
1021 'fpuds': ( 'uint', 'both', ),
1022 'mxcsr': ( 'uint', 'both', ),
1023 'st0': ( 'uint', 'both', ),
1024 'st1': ( 'uint', 'both', ),
1025 'st2': ( 'uint', 'both', ),
1026 'st3': ( 'uint', 'both', ),
1027 'st4': ( 'uint', 'both', ),
1028 'st5': ( 'uint', 'both', ),
1029 'st6': ( 'uint', 'both', ),
1030 'st7': ( 'uint', 'both', ),
1031 # MMX registers.
1032 'mm0': ( 'uint', 'both', ),
1033 'mm1': ( 'uint', 'both', ),
1034 'mm2': ( 'uint', 'both', ),
1035 'mm3': ( 'uint', 'both', ),
1036 'mm4': ( 'uint', 'both', ),
1037 'mm5': ( 'uint', 'both', ),
1038 'mm6': ( 'uint', 'both', ),
1039 'mm7': ( 'uint', 'both', ),
1040 # SSE registers.
1041 'xmm0': ( 'uint', 'both', ),
1042 'xmm1': ( 'uint', 'both', ),
1043 'xmm2': ( 'uint', 'both', ),
1044 'xmm3': ( 'uint', 'both', ),
1045 'xmm4': ( 'uint', 'both', ),
1046 'xmm5': ( 'uint', 'both', ),
1047 'xmm6': ( 'uint', 'both', ),
1048 'xmm7': ( 'uint', 'both', ),
1049 'xmm8': ( 'uint', 'both', ),
1050 'xmm9': ( 'uint', 'both', ),
1051 'xmm10': ( 'uint', 'both', ),
1052 'xmm11': ( 'uint', 'both', ),
1053 'xmm12': ( 'uint', 'both', ),
1054 'xmm13': ( 'uint', 'both', ),
1055 'xmm14': ( 'uint', 'both', ),
1056 'xmm15': ( 'uint', 'both', ),
1057 'xmm0.lo': ( 'uint', 'both', ),
1058 'xmm1.lo': ( 'uint', 'both', ),
1059 'xmm2.lo': ( 'uint', 'both', ),
1060 'xmm3.lo': ( 'uint', 'both', ),
1061 'xmm4.lo': ( 'uint', 'both', ),
1062 'xmm5.lo': ( 'uint', 'both', ),
1063 'xmm6.lo': ( 'uint', 'both', ),
1064 'xmm7.lo': ( 'uint', 'both', ),
1065 'xmm8.lo': ( 'uint', 'both', ),
1066 'xmm9.lo': ( 'uint', 'both', ),
1067 'xmm10.lo': ( 'uint', 'both', ),
1068 'xmm11.lo': ( 'uint', 'both', ),
1069 'xmm12.lo': ( 'uint', 'both', ),
1070 'xmm13.lo': ( 'uint', 'both', ),
1071 'xmm14.lo': ( 'uint', 'both', ),
1072 'xmm15.lo': ( 'uint', 'both', ),
1073 'xmm0.hi': ( 'uint', 'both', ),
1074 'xmm1.hi': ( 'uint', 'both', ),
1075 'xmm2.hi': ( 'uint', 'both', ),
1076 'xmm3.hi': ( 'uint', 'both', ),
1077 'xmm4.hi': ( 'uint', 'both', ),
1078 'xmm5.hi': ( 'uint', 'both', ),
1079 'xmm6.hi': ( 'uint', 'both', ),
1080 'xmm7.hi': ( 'uint', 'both', ),
1081 'xmm8.hi': ( 'uint', 'both', ),
1082 'xmm9.hi': ( 'uint', 'both', ),
1083 'xmm10.hi': ( 'uint', 'both', ),
1084 'xmm11.hi': ( 'uint', 'both', ),
1085 'xmm12.hi': ( 'uint', 'both', ),
1086 'xmm13.hi': ( 'uint', 'both', ),
1087 'xmm14.hi': ( 'uint', 'both', ),
1088 'xmm15.hi': ( 'uint', 'both', ),
1089 'xmm0.lo.zx': ( 'uint', 'both', ),
1090 'xmm1.lo.zx': ( 'uint', 'both', ),
1091 'xmm2.lo.zx': ( 'uint', 'both', ),
1092 'xmm3.lo.zx': ( 'uint', 'both', ),
1093 'xmm4.lo.zx': ( 'uint', 'both', ),
1094 'xmm5.lo.zx': ( 'uint', 'both', ),
1095 'xmm6.lo.zx': ( 'uint', 'both', ),
1096 'xmm7.lo.zx': ( 'uint', 'both', ),
1097 'xmm8.lo.zx': ( 'uint', 'both', ),
1098 'xmm9.lo.zx': ( 'uint', 'both', ),
1099 'xmm10.lo.zx': ( 'uint', 'both', ),
1100 'xmm11.lo.zx': ( 'uint', 'both', ),
1101 'xmm12.lo.zx': ( 'uint', 'both', ),
1102 'xmm13.lo.zx': ( 'uint', 'both', ),
1103 'xmm14.lo.zx': ( 'uint', 'both', ),
1104 'xmm15.lo.zx': ( 'uint', 'both', ),
1105 'xmm0.dw0': ( 'uint', 'both', ),
1106 'xmm1.dw0': ( 'uint', 'both', ),
1107 'xmm2.dw0': ( 'uint', 'both', ),
1108 'xmm3.dw0': ( 'uint', 'both', ),
1109 'xmm4.dw0': ( 'uint', 'both', ),
1110 'xmm5.dw0': ( 'uint', 'both', ),
1111 'xmm6.dw0': ( 'uint', 'both', ),
1112 'xmm7.dw0': ( 'uint', 'both', ),
1113 'xmm8.dw0': ( 'uint', 'both', ),
1114 'xmm9.dw0': ( 'uint', 'both', ),
1115 'xmm10.dw0': ( 'uint', 'both', ),
1116 'xmm11.dw0': ( 'uint', 'both', ),
1117 'xmm12.dw0': ( 'uint', 'both', ),
1118 'xmm13.dw0': ( 'uint', 'both', ),
1119 'xmm14.dw0': ( 'uint', 'both', ),
1120 'xmm15_dw0': ( 'uint', 'both', ),
1121 # AVX registers.
1122 'ymm0': ( 'uint', 'both', ),
1123 'ymm1': ( 'uint', 'both', ),
1124 'ymm2': ( 'uint', 'both', ),
1125 'ymm3': ( 'uint', 'both', ),
1126 'ymm4': ( 'uint', 'both', ),
1127 'ymm5': ( 'uint', 'both', ),
1128 'ymm6': ( 'uint', 'both', ),
1129 'ymm7': ( 'uint', 'both', ),
1130 'ymm8': ( 'uint', 'both', ),
1131 'ymm9': ( 'uint', 'both', ),
1132 'ymm10': ( 'uint', 'both', ),
1133 'ymm11': ( 'uint', 'both', ),
1134 'ymm12': ( 'uint', 'both', ),
1135 'ymm13': ( 'uint', 'both', ),
1136 'ymm14': ( 'uint', 'both', ),
1137 'ymm15': ( 'uint', 'both', ),
1138
1139 # Special ones.
1140 'value.xcpt': ( 'uint', 'output', ),
1141 };
1142
1143 def __init__(self, sField, sOp, sValue, sType):
1144 assert sField in self.kdFields;
1145 assert sOp in self.kasOperators;
1146 self.sField = sField;
1147 self.sOp = sOp;
1148 self.sValue = sValue;
1149 self.sType = sType;
1150 assert isinstance(sField, str);
1151 assert isinstance(sOp, str);
1152 assert isinstance(sType, str);
1153 assert isinstance(sValue, str);
1154
1155
1156class TestSelector(object):
1157 """
1158 One selector for an instruction test.
1159 """
1160 ## Selector compare operators.
1161 kasCompareOps = [ '==', '!=' ];
1162 ## Selector variables and their valid values.
1163 kdVariables = {
1164 # Operand size.
1165 'size': {
1166 'o16': 'size_o16',
1167 'o32': 'size_o32',
1168 'o64': 'size_o64',
1169 },
1170 # VEX.L value.
1171 'vex.l': {
1172 '0': 'vexl_0',
1173 '1': 'vexl_1',
1174 },
1175 # Execution ring.
1176 'ring': {
1177 '0': 'ring_0',
1178 '1': 'ring_1',
1179 '2': 'ring_2',
1180 '3': 'ring_3',
1181 '0..2': 'ring_0_thru_2',
1182 '1..3': 'ring_1_thru_3',
1183 },
1184 # Basic code mode.
1185 'codebits': {
1186 '64': 'code_64bit',
1187 '32': 'code_32bit',
1188 '16': 'code_16bit',
1189 },
1190 # cpu modes.
1191 'mode': {
1192 'real': 'mode_real',
1193 'prot': 'mode_prot',
1194 'long': 'mode_long',
1195 'v86': 'mode_v86',
1196 'smm': 'mode_smm',
1197 'vmx': 'mode_vmx',
1198 'svm': 'mode_svm',
1199 },
1200 # paging on/off
1201 'paging': {
1202 'on': 'paging_on',
1203 'off': 'paging_off',
1204 },
1205 # CPU vendor
1206 'vendor': {
1207 'amd': 'vendor_amd',
1208 'intel': 'vendor_intel',
1209 'via': 'vendor_via',
1210 },
1211 };
1212 ## Selector shorthand predicates.
1213 ## These translates into variable expressions.
1214 kdPredicates = {
1215 'o16': 'size==o16',
1216 'o32': 'size==o32',
1217 'o64': 'size==o64',
1218 'ring0': 'ring==0',
1219 '!ring0': 'ring==1..3',
1220 'ring1': 'ring==1',
1221 'ring2': 'ring==2',
1222 'ring3': 'ring==3',
1223 'user': 'ring==3',
1224 'supervisor': 'ring==0..2',
1225 '16-bit': 'codebits==16',
1226 '32-bit': 'codebits==32',
1227 '64-bit': 'codebits==64',
1228 'real': 'mode==real',
1229 'prot': 'mode==prot',
1230 'long': 'mode==long',
1231 'v86': 'mode==v86',
1232 'smm': 'mode==smm',
1233 'vmx': 'mode==vmx',
1234 'svm': 'mode==svm',
1235 'paging': 'paging==on',
1236 '!paging': 'paging==off',
1237 'amd': 'vendor==amd',
1238 '!amd': 'vendor!=amd',
1239 'intel': 'vendor==intel',
1240 '!intel': 'vendor!=intel',
1241 'via': 'vendor==via',
1242 '!via': 'vendor!=via',
1243 };
1244
1245 def __init__(self, sVariable, sOp, sValue):
1246 assert sVariable in self.kdVariables;
1247 assert sOp in self.kasCompareOps;
1248 assert sValue in self.kdVariables[sVariable];
1249 self.sVariable = sVariable;
1250 self.sOp = sOp;
1251 self.sValue = sValue;
1252
1253
1254class InstructionTest(object):
1255 """
1256 Instruction test.
1257 """
1258
1259 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1260 self.oInstr = oInstr; # type: InstructionTest
1261 self.aoInputs = []; # type: list(TestInOut)
1262 self.aoOutputs = []; # type: list(TestInOut)
1263 self.aoSelectors = []; # type: list(TestSelector)
1264
1265 def toString(self, fRepr = False):
1266 """
1267 Converts it to string representation.
1268 """
1269 asWords = [];
1270 if self.aoSelectors:
1271 for oSelector in self.aoSelectors:
1272 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1273 asWords.append('/');
1274
1275 for oModifier in self.aoInputs:
1276 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1277
1278 asWords.append('->');
1279
1280 for oModifier in self.aoOutputs:
1281 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1282
1283 if fRepr:
1284 return '<' + ' '.join(asWords) + '>';
1285 return ' '.join(asWords);
1286
1287 def __str__(self):
1288 """ Provide string represenation. """
1289 return self.toString(False);
1290
1291 def __repr__(self):
1292 """ Provide unambigious string representation. """
1293 return self.toString(True);
1294
1295class Operand(object):
1296 """
1297 Instruction operand.
1298 """
1299
1300 def __init__(self, sWhere, sType):
1301 assert sWhere in g_kdOpLocations, sWhere;
1302 assert sType in g_kdOpTypes, sType;
1303 self.sWhere = sWhere; ##< g_kdOpLocations
1304 self.sType = sType; ##< g_kdOpTypes
1305
1306 def usesModRM(self):
1307 """ Returns True if using some form of ModR/M encoding. """
1308 return self.sType[0] in ['E', 'G', 'M'];
1309
1310
1311
1312class Instruction(object): # pylint: disable=too-many-instance-attributes
1313 """
1314 Instruction.
1315 """
1316
1317 def __init__(self, sSrcFile, iLine):
1318 ## @name Core attributes.
1319 ## @{
1320 self.sMnemonic = None;
1321 self.sBrief = None;
1322 self.asDescSections = []; # type: list(str)
1323 self.aoMaps = []; # type: list(InstructionMap)
1324 self.aoOperands = []; # type: list(Operand)
1325 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1326 self.sOpcode = None; # type: str
1327 self.sSubOpcode = None; # type: str
1328 self.sEncoding = None;
1329 self.asFlTest = None;
1330 self.asFlModify = None;
1331 self.asFlUndefined = None;
1332 self.asFlSet = None;
1333 self.asFlClear = None;
1334 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1335 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1336 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1337 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1338 self.aoTests = []; # type: list(InstructionTest)
1339 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1340 self.oCpuExpr = None; ##< Some CPU restriction expression...
1341 self.sGroup = None;
1342 self.fUnused = False; ##< Unused instruction.
1343 self.fInvalid = False; ##< Invalid instruction (like UD2).
1344 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1345 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1346 ## @}
1347
1348 ## @name Implementation attributes.
1349 ## @{
1350 self.sStats = None;
1351 self.sFunction = None;
1352 self.fStub = False;
1353 self.fUdStub = False;
1354 ## @}
1355
1356 ## @name Decoding info
1357 ## @{
1358 self.sSrcFile = sSrcFile;
1359 self.iLineCreated = iLine;
1360 self.iLineCompleted = None;
1361 self.cOpTags = 0;
1362 self.iLineFnIemOpMacro = -1;
1363 self.iLineMnemonicMacro = -1;
1364 ## @}
1365
1366 ## @name Intermediate input fields.
1367 ## @{
1368 self.sRawDisOpNo = None;
1369 self.asRawDisParams = [];
1370 self.sRawIemOpFlags = None;
1371 self.sRawOldOpcodes = None;
1372 self.asCopyTests = [];
1373 ## @}
1374
1375 def toString(self, fRepr = False):
1376 """ Turn object into a string. """
1377 aasFields = [];
1378
1379 aasFields.append(['opcode', self.sOpcode]);
1380 aasFields.append(['mnemonic', self.sMnemonic]);
1381 for iOperand, oOperand in enumerate(self.aoOperands):
1382 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1383 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1384 aasFields.append(['encoding', self.sEncoding]);
1385 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1386 aasFields.append(['disenum', self.sDisEnum]);
1387 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1388 aasFields.append(['group', self.sGroup]);
1389 if self.fUnused: aasFields.append(['unused', 'True']);
1390 if self.fInvalid: aasFields.append(['invalid', 'True']);
1391 aasFields.append(['invlstyle', self.sInvalidStyle]);
1392 aasFields.append(['fltest', self.asFlTest]);
1393 aasFields.append(['flmodify', self.asFlModify]);
1394 aasFields.append(['flundef', self.asFlUndefined]);
1395 aasFields.append(['flset', self.asFlSet]);
1396 aasFields.append(['flclear', self.asFlClear]);
1397 aasFields.append(['mincpu', self.sMinCpu]);
1398 aasFields.append(['stats', self.sStats]);
1399 aasFields.append(['sFunction', self.sFunction]);
1400 if self.fStub: aasFields.append(['fStub', 'True']);
1401 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1402 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1403 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1404 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1405
1406 sRet = '<' if fRepr else '';
1407 for sField, sValue in aasFields:
1408 if sValue != None:
1409 if len(sRet) > 1:
1410 sRet += '; ';
1411 sRet += '%s=%s' % (sField, sValue,);
1412 if fRepr:
1413 sRet += '>';
1414
1415 return sRet;
1416
1417 def __str__(self):
1418 """ Provide string represenation. """
1419 return self.toString(False);
1420
1421 def __repr__(self):
1422 """ Provide unambigious string representation. """
1423 return self.toString(True);
1424
1425 def getOpcodeByte(self):
1426 """
1427 Decodes sOpcode into a byte range integer value.
1428 Raises exception if sOpcode is None or invalid.
1429 """
1430 if self.sOpcode is None:
1431 raise Exception('No opcode byte for %s!' % (self,));
1432 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1433
1434 # Full hex byte form.
1435 if sOpcode[:2] == '0x':
1436 return int(sOpcode, 16);
1437
1438 # The /r form:
1439 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1440 return int(sOpcode[-1:]) << 3;
1441
1442 # The 11/r form:
1443 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1444 return (int(sOpcode[-1:]) << 3) | 0xc0;
1445
1446 # The !11/r form (returns mod=1):
1447 ## @todo this doesn't really work...
1448 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1449 return (int(sOpcode[-1:]) << 3) | 0x80;
1450
1451 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1452
1453 @staticmethod
1454 def _flagsToIntegerMask(asFlags):
1455 """
1456 Returns the integer mask value for asFlags.
1457 """
1458 uRet = 0;
1459 if asFlags:
1460 for sFlag in asFlags:
1461 sConstant = g_kdEFlagsMnemonics[sFlag];
1462 assert sConstant[0] != '!', sConstant
1463 uRet |= g_kdX86EFlagsConstants[sConstant];
1464 return uRet;
1465
1466 def getTestedFlagsMask(self):
1467 """ Returns asFlTest into a integer mask value """
1468 return self._flagsToIntegerMask(self.asFlTest);
1469
1470 def getModifiedFlagsMask(self):
1471 """ Returns asFlModify into a integer mask value """
1472 return self._flagsToIntegerMask(self.asFlModify);
1473
1474 def getUndefinedFlagsMask(self):
1475 """ Returns asFlUndefined into a integer mask value """
1476 return self._flagsToIntegerMask(self.asFlUndefined);
1477
1478 def getSetFlagsMask(self):
1479 """ Returns asFlSet into a integer mask value """
1480 return self._flagsToIntegerMask(self.asFlSet);
1481
1482 def getClearedFlagsMask(self):
1483 """ Returns asFlClear into a integer mask value """
1484 return self._flagsToIntegerMask(self.asFlClear);
1485
1486 def onlyInVexMaps(self):
1487 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1488 if not self.aoMaps:
1489 return False;
1490 for oMap in self.aoMaps:
1491 if not oMap.isVexMap():
1492 return False;
1493 return True;
1494
1495
1496
1497## All the instructions.
1498g_aoAllInstructions = []; # type: list(Instruction)
1499
1500## All the instructions indexed by statistics name (opstat).
1501g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1502
1503## All the instructions indexed by function name (opfunction).
1504g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1505
1506## Instructions tagged by oponlytest
1507g_aoOnlyTestInstructions = []; # type: list(Instruction)
1508
1509## Instruction maps.
1510g_dInstructionMaps = {
1511 'one': InstructionMap('one'),
1512 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1513 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1514 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1515 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1516 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1517 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1518 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1519 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1520 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1521 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1522 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1523 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1524 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1525 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1526 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1527 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1528 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1529 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1530 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1531
1532 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1533 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1534 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1535 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1536 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1537 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1538 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1539 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1540 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1541 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1542 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1543 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1544 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1545 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1546
1547 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1548 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1549
1550 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1551 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1552 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1553 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1554 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1555 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1556
1557 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1558 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1559
1560 '3dnow': InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1561 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1562 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1563 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1564 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1565 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1566 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1567 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1568};
1569
1570
1571
1572class ParserException(Exception):
1573 """ Parser exception """
1574 def __init__(self, sMessage):
1575 Exception.__init__(self, sMessage);
1576
1577
1578class SimpleParser(object):
1579 """
1580 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1581 """
1582
1583 ## @name Parser state.
1584 ## @{
1585 kiCode = 0;
1586 kiCommentMulti = 1;
1587 ## @}
1588
1589 def __init__(self, sSrcFile, asLines, sDefaultMap):
1590 self.sSrcFile = sSrcFile;
1591 self.asLines = asLines;
1592 self.iLine = 0;
1593 self.iState = self.kiCode;
1594 self.sComment = '';
1595 self.iCommentLine = 0;
1596 self.aoCurInstrs = [];
1597
1598 assert sDefaultMap in g_dInstructionMaps;
1599 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1600
1601 self.cTotalInstr = 0;
1602 self.cTotalStubs = 0;
1603 self.cTotalTagged = 0;
1604
1605 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1606 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1607 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1608 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1609 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1610 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1611 self.fDebug = True;
1612
1613 self.dTagHandlers = {
1614 '@opbrief': self.parseTagOpBrief,
1615 '@opdesc': self.parseTagOpDesc,
1616 '@opmnemonic': self.parseTagOpMnemonic,
1617 '@op1': self.parseTagOpOperandN,
1618 '@op2': self.parseTagOpOperandN,
1619 '@op3': self.parseTagOpOperandN,
1620 '@op4': self.parseTagOpOperandN,
1621 '@oppfx': self.parseTagOpPfx,
1622 '@opmaps': self.parseTagOpMaps,
1623 '@opcode': self.parseTagOpcode,
1624 '@opcodesub': self.parseTagOpcodeSub,
1625 '@openc': self.parseTagOpEnc,
1626 '@opfltest': self.parseTagOpEFlags,
1627 '@opflmodify': self.parseTagOpEFlags,
1628 '@opflundef': self.parseTagOpEFlags,
1629 '@opflset': self.parseTagOpEFlags,
1630 '@opflclear': self.parseTagOpEFlags,
1631 '@ophints': self.parseTagOpHints,
1632 '@opdisenum': self.parseTagOpDisEnum,
1633 '@opmincpu': self.parseTagOpMinCpu,
1634 '@opcpuid': self.parseTagOpCpuId,
1635 '@opgroup': self.parseTagOpGroup,
1636 '@opunused': self.parseTagOpUnusedInvalid,
1637 '@opinvalid': self.parseTagOpUnusedInvalid,
1638 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1639 '@optest': self.parseTagOpTest,
1640 '@optestign': self.parseTagOpTestIgnore,
1641 '@optestignore': self.parseTagOpTestIgnore,
1642 '@opcopytests': self.parseTagOpCopyTests,
1643 '@oponly': self.parseTagOpOnlyTest,
1644 '@oponlytest': self.parseTagOpOnlyTest,
1645 '@opxcpttype': self.parseTagOpXcptType,
1646 '@opstats': self.parseTagOpStats,
1647 '@opfunction': self.parseTagOpFunction,
1648 '@opdone': self.parseTagOpDone,
1649 };
1650 for i in range(48):
1651 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1652 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1653
1654 self.asErrors = [];
1655
1656 def raiseError(self, sMessage):
1657 """
1658 Raise error prefixed with the source and line number.
1659 """
1660 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1661
1662 def raiseCommentError(self, iLineInComment, sMessage):
1663 """
1664 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1665 """
1666 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1667
1668 def error(self, sMessage):
1669 """
1670 Adds an error.
1671 returns False;
1672 """
1673 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1674 return False;
1675
1676 def errorComment(self, iLineInComment, sMessage):
1677 """
1678 Adds a comment error.
1679 returns False;
1680 """
1681 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1682 return False;
1683
1684 def printErrors(self):
1685 """
1686 Print the errors to stderr.
1687 Returns number of errors.
1688 """
1689 if self.asErrors:
1690 sys.stderr.write(u''.join(self.asErrors));
1691 return len(self.asErrors);
1692
1693 def debug(self, sMessage):
1694 """
1695 For debugging.
1696 """
1697 if self.fDebug:
1698 print('debug: %s' % (sMessage,));
1699
1700
1701 def addInstruction(self, iLine = None):
1702 """
1703 Adds an instruction.
1704 """
1705 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1706 g_aoAllInstructions.append(oInstr);
1707 self.aoCurInstrs.append(oInstr);
1708 return oInstr;
1709
1710 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1711 """
1712 Derives the mnemonic and operands from a IEM stats base name like string.
1713 """
1714 if oInstr.sMnemonic is None:
1715 asWords = sStats.split('_');
1716 oInstr.sMnemonic = asWords[0].lower();
1717 if len(asWords) > 1 and not oInstr.aoOperands:
1718 for sType in asWords[1:]:
1719 if sType in g_kdOpTypes:
1720 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1721 else:
1722 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1723 return False;
1724 return True;
1725
1726 def doneInstructionOne(self, oInstr, iLine):
1727 """
1728 Complete the parsing by processing, validating and expanding raw inputs.
1729 """
1730 assert oInstr.iLineCompleted is None;
1731 oInstr.iLineCompleted = iLine;
1732
1733 #
1734 # Specified instructions.
1735 #
1736 if oInstr.cOpTags > 0:
1737 if oInstr.sStats is None:
1738 pass;
1739
1740 #
1741 # Unspecified legacy stuff. We generally only got a few things to go on here.
1742 # /** Opcode 0x0f 0x00 /0. */
1743 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1744 #
1745 else:
1746 #if oInstr.sRawOldOpcodes:
1747 #
1748 #if oInstr.sMnemonic:
1749 pass;
1750
1751 #
1752 # Common defaults.
1753 #
1754
1755 # Guess mnemonic and operands from stats if the former is missing.
1756 if oInstr.sMnemonic is None:
1757 if oInstr.sStats is not None:
1758 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1759 elif oInstr.sFunction is not None:
1760 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1761
1762 # Derive the disassembler op enum constant from the mnemonic.
1763 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1764 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1765
1766 # Derive the IEM statistics base name from mnemonic and operand types.
1767 if oInstr.sStats is None:
1768 if oInstr.sFunction is not None:
1769 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1770 elif oInstr.sMnemonic is not None:
1771 oInstr.sStats = oInstr.sMnemonic;
1772 for oOperand in oInstr.aoOperands:
1773 if oOperand.sType:
1774 oInstr.sStats += '_' + oOperand.sType;
1775
1776 # Derive the IEM function name from mnemonic and operand types.
1777 if oInstr.sFunction is None:
1778 if oInstr.sMnemonic is not None:
1779 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1780 for oOperand in oInstr.aoOperands:
1781 if oOperand.sType:
1782 oInstr.sFunction += '_' + oOperand.sType;
1783 elif oInstr.sStats:
1784 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1785
1786 #
1787 # Apply default map and then add the instruction to all it's groups.
1788 #
1789 if not oInstr.aoMaps:
1790 oInstr.aoMaps = [ self.oDefaultMap, ];
1791 for oMap in oInstr.aoMaps:
1792 oMap.aoInstructions.append(oInstr);
1793
1794 #
1795 # Derive encoding from operands and maps.
1796 #
1797 if oInstr.sEncoding is None:
1798 if not oInstr.aoOperands:
1799 if oInstr.fUnused and oInstr.sSubOpcode:
1800 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
1801 else:
1802 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
1803 elif oInstr.aoOperands[0].usesModRM():
1804 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
1805 or oInstr.onlyInVexMaps():
1806 oInstr.sEncoding = 'VEX.ModR/M';
1807 else:
1808 oInstr.sEncoding = 'ModR/M';
1809
1810 #
1811 # Check the opstat value and add it to the opstat indexed dictionary.
1812 #
1813 if oInstr.sStats:
1814 if oInstr.sStats not in g_dAllInstructionsByStat:
1815 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1816 else:
1817 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1818 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1819
1820 #
1821 # Add to function indexed dictionary. We allow multiple instructions per function.
1822 #
1823 if oInstr.sFunction:
1824 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1825 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1826 else:
1827 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1828
1829 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1830 return True;
1831
1832 def doneInstructions(self, iLineInComment = None):
1833 """
1834 Done with current instruction.
1835 """
1836 for oInstr in self.aoCurInstrs:
1837 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1838 if oInstr.fStub:
1839 self.cTotalStubs += 1;
1840
1841 self.cTotalInstr += len(self.aoCurInstrs);
1842
1843 self.sComment = '';
1844 self.aoCurInstrs = [];
1845 return True;
1846
1847 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1848 """
1849 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1850 is False, only None values and empty strings are replaced.
1851 """
1852 for oInstr in self.aoCurInstrs:
1853 if fOverwrite is not True:
1854 oOldValue = getattr(oInstr, sAttrib);
1855 if oOldValue is not None:
1856 continue;
1857 setattr(oInstr, sAttrib, oValue);
1858
1859 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1860 """
1861 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1862 If fOverwrite is False, only None values and empty strings are replaced.
1863 """
1864 for oInstr in self.aoCurInstrs:
1865 aoArray = getattr(oInstr, sAttrib);
1866 while len(aoArray) <= iEntry:
1867 aoArray.append(None);
1868 if fOverwrite is True or aoArray[iEntry] is None:
1869 aoArray[iEntry] = oValue;
1870
1871 def parseCommentOldOpcode(self, asLines):
1872 """ Deals with 'Opcode 0xff /4' like comments """
1873 asWords = asLines[0].split();
1874 if len(asWords) >= 2 \
1875 and asWords[0] == 'Opcode' \
1876 and ( asWords[1].startswith('0x')
1877 or asWords[1].startswith('0X')):
1878 asWords = asWords[:1];
1879 for iWord, sWord in enumerate(asWords):
1880 if sWord.startswith('0X'):
1881 sWord = '0x' + sWord[:2];
1882 asWords[iWord] = asWords;
1883 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1884
1885 return False;
1886
1887 def ensureInstructionForOpTag(self, iTagLine):
1888 """ Ensure there is an instruction for the op-tag being parsed. """
1889 if not self.aoCurInstrs:
1890 self.addInstruction(self.iCommentLine + iTagLine);
1891 for oInstr in self.aoCurInstrs:
1892 oInstr.cOpTags += 1;
1893 if oInstr.cOpTags == 1:
1894 self.cTotalTagged += 1;
1895 return self.aoCurInstrs[-1];
1896
1897 @staticmethod
1898 def flattenSections(aasSections):
1899 """
1900 Flattens multiline sections into stripped single strings.
1901 Returns list of strings, on section per string.
1902 """
1903 asRet = [];
1904 for asLines in aasSections:
1905 if asLines:
1906 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1907 return asRet;
1908
1909 @staticmethod
1910 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1911 """
1912 Flattens sections into a simple stripped string with newlines as
1913 section breaks. The final section does not sport a trailing newline.
1914 """
1915 # Typical: One section with a single line.
1916 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1917 return aasSections[0][0].strip();
1918
1919 sRet = '';
1920 for iSection, asLines in enumerate(aasSections):
1921 if asLines:
1922 if iSection > 0:
1923 sRet += sSectionSep;
1924 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1925 return sRet;
1926
1927
1928
1929 ## @name Tag parsers
1930 ## @{
1931
1932 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1933 """
1934 Tag: \@opbrief
1935 Value: Text description, multiple sections, appended.
1936
1937 Brief description. If not given, it's the first sentence from @opdesc.
1938 """
1939 oInstr = self.ensureInstructionForOpTag(iTagLine);
1940
1941 # Flatten and validate the value.
1942 sBrief = self.flattenAllSections(aasSections);
1943 if not sBrief:
1944 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1945 if sBrief[-1] != '.':
1946 sBrief = sBrief + '.';
1947 if len(sBrief) > 180:
1948 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1949 offDot = sBrief.find('.');
1950 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1951 offDot = sBrief.find('.', offDot + 1);
1952 if offDot >= 0 and offDot != len(sBrief) - 1:
1953 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1954
1955 # Update the instruction.
1956 if oInstr.sBrief is not None:
1957 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1958 % (sTag, oInstr.sBrief, sBrief,));
1959 _ = iEndLine;
1960 return True;
1961
1962 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1963 """
1964 Tag: \@opdesc
1965 Value: Text description, multiple sections, appended.
1966
1967 It is used to describe instructions.
1968 """
1969 oInstr = self.ensureInstructionForOpTag(iTagLine);
1970 if aasSections:
1971 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1972 return True;
1973
1974 _ = sTag; _ = iEndLine;
1975 return True;
1976
1977 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1978 """
1979 Tag: @opmenmonic
1980 Value: mnemonic
1981
1982 The 'mnemonic' value must be a valid C identifier string. Because of
1983 prefixes, groups and whatnot, there times when the mnemonic isn't that
1984 of an actual assembler mnemonic.
1985 """
1986 oInstr = self.ensureInstructionForOpTag(iTagLine);
1987
1988 # Flatten and validate the value.
1989 sMnemonic = self.flattenAllSections(aasSections);
1990 if not self.oReMnemonic.match(sMnemonic):
1991 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1992 if oInstr.sMnemonic is not None:
1993 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1994 % (sTag, oInstr.sMnemonic, sMnemonic,));
1995 oInstr.sMnemonic = sMnemonic
1996
1997 _ = iEndLine;
1998 return True;
1999
2000 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
2001 """
2002 Tags: \@op1, \@op2, \@op3, \@op4
2003 Value: [where:]type
2004
2005 The 'where' value indicates where the operand is found, like the 'reg'
2006 part of the ModR/M encoding. See Instruction.kdOperandLocations for
2007 a list.
2008
2009 The 'type' value indicates the operand type. These follow the types
2010 given in the opcode tables in the CPU reference manuals.
2011 See Instruction.kdOperandTypes for a list.
2012
2013 """
2014 oInstr = self.ensureInstructionForOpTag(iTagLine);
2015 idxOp = int(sTag[-1]) - 1;
2016 assert idxOp >= 0 and idxOp < 4;
2017
2018 # flatten, split up, and validate the "where:type" value.
2019 sFlattened = self.flattenAllSections(aasSections);
2020 asSplit = sFlattened.split(':');
2021 if len(asSplit) == 1:
2022 sType = asSplit[0];
2023 sWhere = None;
2024 elif len(asSplit) == 2:
2025 (sWhere, sType) = asSplit;
2026 else:
2027 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
2028
2029 if sType not in g_kdOpTypes:
2030 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2031 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
2032 if sWhere is None:
2033 sWhere = g_kdOpTypes[sType][1];
2034 elif sWhere not in g_kdOpLocations:
2035 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2036 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
2037
2038 # Insert the operand, refusing to overwrite an existing one.
2039 while idxOp >= len(oInstr.aoOperands):
2040 oInstr.aoOperands.append(None);
2041 if oInstr.aoOperands[idxOp] is not None:
2042 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
2043 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
2044 sWhere, sType,));
2045 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
2046
2047 _ = iEndLine;
2048 return True;
2049
2050 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
2051 """
2052 Tag: \@opmaps
2053 Value: map[,map2]
2054
2055 Indicates which maps the instruction is in. There is a default map
2056 associated with each input file.
2057 """
2058 oInstr = self.ensureInstructionForOpTag(iTagLine);
2059
2060 # Flatten, split up and validate the value.
2061 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
2062 asMaps = sFlattened.split(',');
2063 if not asMaps:
2064 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2065 for sMap in asMaps:
2066 if sMap not in g_dInstructionMaps:
2067 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
2068 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2069
2070 # Add the maps to the current list. Throw errors on duplicates.
2071 for oMap in oInstr.aoMaps:
2072 if oMap.sName in asMaps:
2073 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2074
2075 for sMap in asMaps:
2076 oMap = g_dInstructionMaps[sMap];
2077 if oMap not in oInstr.aoMaps:
2078 oInstr.aoMaps.append(oMap);
2079 else:
2080 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2081
2082 _ = iEndLine;
2083 return True;
2084
2085 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2086 """
2087 Tag: \@oppfx
2088 Value: n/a|none|0x66|0xf3|0xf2
2089
2090 Required prefix for the instruction. (In a (E)VEX context this is the
2091 value of the 'pp' field rather than an actual prefix.)
2092 """
2093 oInstr = self.ensureInstructionForOpTag(iTagLine);
2094
2095 # Flatten and validate the value.
2096 sFlattened = self.flattenAllSections(aasSections);
2097 asPrefixes = sFlattened.split();
2098 if len(asPrefixes) > 1:
2099 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2100
2101 sPrefix = asPrefixes[0].lower();
2102 if sPrefix == 'none':
2103 sPrefix = 'none';
2104 elif sPrefix == 'n/a':
2105 sPrefix = None;
2106 else:
2107 if len(sPrefix) == 2:
2108 sPrefix = '0x' + sPrefix;
2109 if not _isValidOpcodeByte(sPrefix):
2110 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2111
2112 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2113 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2114
2115 # Set it.
2116 if oInstr.sPrefix is not None:
2117 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2118 oInstr.sPrefix = sPrefix;
2119
2120 _ = iEndLine;
2121 return True;
2122
2123 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2124 """
2125 Tag: \@opcode
2126 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2127
2128 The opcode byte or sub-byte for the instruction in the context of a map.
2129 """
2130 oInstr = self.ensureInstructionForOpTag(iTagLine);
2131
2132 # Flatten and validate the value.
2133 sOpcode = self.flattenAllSections(aasSections);
2134 if _isValidOpcodeByte(sOpcode):
2135 pass;
2136 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2137 pass;
2138 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2139 pass;
2140 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2141 pass;
2142 else:
2143 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2144
2145 # Set it.
2146 if oInstr.sOpcode is not None:
2147 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2148 oInstr.sOpcode = sOpcode;
2149
2150 _ = iEndLine;
2151 return True;
2152
2153 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2154 """
2155 Tag: \@opcodesub
2156 Value: none | 11 mr/reg | !11 mr/reg
2157
2158 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2159 represents exactly two different instructions. The more proper way would
2160 be to go via maps with two members, but this is faster.
2161 """
2162 oInstr = self.ensureInstructionForOpTag(iTagLine);
2163
2164 # Flatten and validate the value.
2165 sSubOpcode = self.flattenAllSections(aasSections);
2166 if sSubOpcode not in g_kdSubOpcodes:
2167 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2168 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2169
2170 # Set it.
2171 if oInstr.sSubOpcode is not None:
2172 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2173 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2174 oInstr.sSubOpcode = sSubOpcode;
2175
2176 _ = iEndLine;
2177 return True;
2178
2179 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2180 """
2181 Tag: \@openc
2182 Value: ModR/M|fixed|prefix|<map name>
2183
2184 The instruction operand encoding style.
2185 """
2186 oInstr = self.ensureInstructionForOpTag(iTagLine);
2187
2188 # Flatten and validate the value.
2189 sEncoding = self.flattenAllSections(aasSections);
2190 if sEncoding in g_kdEncodings:
2191 pass;
2192 elif sEncoding in g_dInstructionMaps:
2193 pass;
2194 elif not _isValidOpcodeByte(sEncoding):
2195 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2196
2197 # Set it.
2198 if oInstr.sEncoding is not None:
2199 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2200 % ( sTag, oInstr.sEncoding, sEncoding,));
2201 oInstr.sEncoding = sEncoding;
2202
2203 _ = iEndLine;
2204 return True;
2205
2206 ## EFlags tag to Instruction attribute name.
2207 kdOpFlagToAttr = {
2208 '@opfltest': 'asFlTest',
2209 '@opflmodify': 'asFlModify',
2210 '@opflundef': 'asFlUndefined',
2211 '@opflset': 'asFlSet',
2212 '@opflclear': 'asFlClear',
2213 };
2214
2215 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2216 """
2217 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2218 Value: <eflags specifier>
2219
2220 """
2221 oInstr = self.ensureInstructionForOpTag(iTagLine);
2222
2223 # Flatten, split up and validate the values.
2224 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2225 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2226 asFlags = [];
2227 else:
2228 fRc = True;
2229 for iFlag, sFlag in enumerate(asFlags):
2230 if sFlag not in g_kdEFlagsMnemonics:
2231 if sFlag.strip() in g_kdEFlagsMnemonics:
2232 asFlags[iFlag] = sFlag.strip();
2233 else:
2234 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2235 if not fRc:
2236 return False;
2237
2238 # Set them.
2239 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2240 if asOld is not None:
2241 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2242 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2243
2244 _ = iEndLine;
2245 return True;
2246
2247 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2248 """
2249 Tag: \@ophints
2250 Value: Comma or space separated list of flags and hints.
2251
2252 This covers the disassembler flags table and more.
2253 """
2254 oInstr = self.ensureInstructionForOpTag(iTagLine);
2255
2256 # Flatten as a space separated list, split it up and validate the values.
2257 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2258 if len(asHints) == 1 and asHints[0].lower() == 'none':
2259 asHints = [];
2260 else:
2261 fRc = True;
2262 for iHint, sHint in enumerate(asHints):
2263 if sHint not in g_kdHints:
2264 if sHint.strip() in g_kdHints:
2265 sHint[iHint] = sHint.strip();
2266 else:
2267 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2268 if not fRc:
2269 return False;
2270
2271 # Append them.
2272 for sHint in asHints:
2273 if sHint not in oInstr.dHints:
2274 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2275 else:
2276 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2277
2278 _ = iEndLine;
2279 return True;
2280
2281 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2282 """
2283 Tag: \@opdisenum
2284 Value: OP_XXXX
2285
2286 This is for select a specific (legacy) disassembler enum value for the
2287 instruction.
2288 """
2289 oInstr = self.ensureInstructionForOpTag(iTagLine);
2290
2291 # Flatten and split.
2292 asWords = self.flattenAllSections(aasSections).split();
2293 if len(asWords) != 1:
2294 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2295 if not asWords:
2296 return False;
2297 sDisEnum = asWords[0];
2298 if not self.oReDisEnum.match(sDisEnum):
2299 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2300 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2301
2302 # Set it.
2303 if oInstr.sDisEnum is not None:
2304 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2305 oInstr.sDisEnum = sDisEnum;
2306
2307 _ = iEndLine;
2308 return True;
2309
2310 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2311 """
2312 Tag: \@opmincpu
2313 Value: <simple CPU name>
2314
2315 Indicates when this instruction was introduced.
2316 """
2317 oInstr = self.ensureInstructionForOpTag(iTagLine);
2318
2319 # Flatten the value, split into words, make sure there's just one, valid it.
2320 asCpus = self.flattenAllSections(aasSections).split();
2321 if len(asCpus) > 1:
2322 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2323
2324 sMinCpu = asCpus[0];
2325 if sMinCpu in g_kdCpuNames:
2326 oInstr.sMinCpu = sMinCpu;
2327 else:
2328 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2329 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2330
2331 # Set it.
2332 if oInstr.sMinCpu is None:
2333 oInstr.sMinCpu = sMinCpu;
2334 elif oInstr.sMinCpu != sMinCpu:
2335 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2336
2337 _ = iEndLine;
2338 return True;
2339
2340 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2341 """
2342 Tag: \@opcpuid
2343 Value: none | <CPUID flag specifier>
2344
2345 CPUID feature bit which is required for the instruction to be present.
2346 """
2347 oInstr = self.ensureInstructionForOpTag(iTagLine);
2348
2349 # Flatten as a space separated list, split it up and validate the values.
2350 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2351 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2352 asCpuIds = [];
2353 else:
2354 fRc = True;
2355 for iCpuId, sCpuId in enumerate(asCpuIds):
2356 if sCpuId not in g_kdCpuIdFlags:
2357 if sCpuId.strip() in g_kdCpuIdFlags:
2358 sCpuId[iCpuId] = sCpuId.strip();
2359 else:
2360 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2361 if not fRc:
2362 return False;
2363
2364 # Append them.
2365 for sCpuId in asCpuIds:
2366 if sCpuId not in oInstr.asCpuIds:
2367 oInstr.asCpuIds.append(sCpuId);
2368 else:
2369 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2370
2371 _ = iEndLine;
2372 return True;
2373
2374 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2375 """
2376 Tag: \@opgroup
2377 Value: op_grp1[_subgrp2[_subsubgrp3]]
2378
2379 Instruction grouping.
2380 """
2381 oInstr = self.ensureInstructionForOpTag(iTagLine);
2382
2383 # Flatten as a space separated list, split it up and validate the values.
2384 asGroups = self.flattenAllSections(aasSections).split();
2385 if len(asGroups) != 1:
2386 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2387 sGroup = asGroups[0];
2388 if not self.oReGroupName.match(sGroup):
2389 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2390 % (sTag, sGroup, self.oReGroupName.pattern));
2391
2392 # Set it.
2393 if oInstr.sGroup is not None:
2394 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2395 oInstr.sGroup = sGroup;
2396
2397 _ = iEndLine;
2398 return True;
2399
2400 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2401 """
2402 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2403 Value: <invalid opcode behaviour style>
2404
2405 The \@opunused indicates the specification is for a currently unused
2406 instruction encoding.
2407
2408 The \@opinvalid indicates the specification is for an invalid currently
2409 instruction encoding (like UD2).
2410
2411 The \@opinvlstyle just indicates how CPUs decode the instruction when
2412 not supported (\@opcpuid, \@opmincpu) or disabled.
2413 """
2414 oInstr = self.ensureInstructionForOpTag(iTagLine);
2415
2416 # Flatten as a space separated list, split it up and validate the values.
2417 asStyles = self.flattenAllSections(aasSections).split();
2418 if len(asStyles) != 1:
2419 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2420 sStyle = asStyles[0];
2421 if sStyle not in g_kdInvalidStyles:
2422 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2423 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2424 # Set it.
2425 if oInstr.sInvalidStyle is not None:
2426 return self.errorComment(iTagLine,
2427 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2428 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2429 oInstr.sInvalidStyle = sStyle;
2430 if sTag == '@opunused':
2431 oInstr.fUnused = True;
2432 elif sTag == '@opinvalid':
2433 oInstr.fInvalid = True;
2434
2435 _ = iEndLine;
2436 return True;
2437
2438 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2439 """
2440 Tag: \@optest
2441 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2442 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2443
2444 The main idea here is to generate basic instruction tests.
2445
2446 The probably simplest way of handling the diverse input, would be to use
2447 it to produce size optimized byte code for a simple interpreter that
2448 modifies the register input and output states.
2449
2450 An alternative to the interpreter would be creating multiple tables,
2451 but that becomes rather complicated wrt what goes where and then to use
2452 them in an efficient manner.
2453 """
2454 oInstr = self.ensureInstructionForOpTag(iTagLine);
2455
2456 #
2457 # Do it section by section.
2458 #
2459 for asSectionLines in aasSections:
2460 #
2461 # Sort the input into outputs, inputs and selector conditions.
2462 #
2463 sFlatSection = self.flattenAllSections([asSectionLines,]);
2464 if not sFlatSection:
2465 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2466 continue;
2467 oTest = InstructionTest(oInstr);
2468
2469 asSelectors = [];
2470 asInputs = [];
2471 asOutputs = [];
2472 asCur = asOutputs;
2473 fRc = True;
2474 asWords = sFlatSection.split();
2475 for iWord in range(len(asWords) - 1, -1, -1):
2476 sWord = asWords[iWord];
2477 # Check for array switchers.
2478 if sWord == '->':
2479 if asCur != asOutputs:
2480 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2481 break;
2482 asCur = asInputs;
2483 elif sWord == '/':
2484 if asCur != asInputs:
2485 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2486 break;
2487 asCur = asSelectors;
2488 else:
2489 asCur.insert(0, sWord);
2490
2491 #
2492 # Validate and add selectors.
2493 #
2494 for sCond in asSelectors:
2495 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2496 oSelector = None;
2497 for sOp in TestSelector.kasCompareOps:
2498 off = sCondExp.find(sOp);
2499 if off >= 0:
2500 sVariable = sCondExp[:off];
2501 sValue = sCondExp[off + len(sOp):];
2502 if sVariable in TestSelector.kdVariables:
2503 if sValue in TestSelector.kdVariables[sVariable]:
2504 oSelector = TestSelector(sVariable, sOp, sValue);
2505 else:
2506 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2507 % ( sTag, sValue, sCond,
2508 TestSelector.kdVariables[sVariable].keys(),));
2509 else:
2510 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2511 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2512 break;
2513 if oSelector is not None:
2514 for oExisting in oTest.aoSelectors:
2515 if oExisting.sVariable == oSelector.sVariable:
2516 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2517 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2518 oTest.aoSelectors.append(oSelector);
2519 else:
2520 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2521
2522 #
2523 # Validate outputs and inputs, adding them to the test as we go along.
2524 #
2525 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2526 asValidFieldKinds = [ 'both', sDesc, ];
2527 for sItem in asItems:
2528 oItem = None;
2529 for sOp in TestInOut.kasOperators:
2530 off = sItem.find(sOp);
2531 if off < 0:
2532 continue;
2533 sField = sItem[:off];
2534 sValueType = sItem[off + len(sOp):];
2535 if sField in TestInOut.kdFields \
2536 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2537 asSplit = sValueType.split(':', 1);
2538 sValue = asSplit[0];
2539 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2540 if sType in TestInOut.kdTypes:
2541 oValid = TestInOut.kdTypes[sType].validate(sValue);
2542 if oValid is True:
2543 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2544 oItem = TestInOut(sField, sOp, sValue, sType);
2545 else:
2546 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2547 % ( sTag, sDesc, sItem, ));
2548 else:
2549 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2550 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2551 else:
2552 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2553 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2554 else:
2555 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2556 % ( sTag, sDesc, sField, sItem,
2557 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2558 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2559 break;
2560 if oItem is not None:
2561 for oExisting in aoDst:
2562 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2563 self.errorComment(iTagLine,
2564 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2565 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2566 aoDst.append(oItem);
2567 else:
2568 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2569
2570 #
2571 # .
2572 #
2573 if fRc:
2574 oInstr.aoTests.append(oTest);
2575 else:
2576 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2577 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2578 % (sTag, asSelectors, asInputs, asOutputs,));
2579
2580 _ = iEndLine;
2581 return True;
2582
2583 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2584 """
2585 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2586 """
2587 oInstr = self.ensureInstructionForOpTag(iTagLine);
2588
2589 iTest = 0;
2590 if sTag[-1] == ']':
2591 iTest = int(sTag[8:-1]);
2592 else:
2593 iTest = int(sTag[7:]);
2594
2595 if iTest != len(oInstr.aoTests):
2596 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2597 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2598
2599 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2600 """
2601 Tag: \@optestign | \@optestignore
2602 Value: <value is ignored>
2603
2604 This is a simple trick to ignore a test while debugging another.
2605
2606 See also \@oponlytest.
2607 """
2608 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2609 return True;
2610
2611 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2612 """
2613 Tag: \@opcopytests
2614 Value: <opstat | function> [..]
2615 Example: \@opcopytests add_Eb_Gb
2616
2617 Trick to avoid duplicating tests for different encodings of the same
2618 operation.
2619 """
2620 oInstr = self.ensureInstructionForOpTag(iTagLine);
2621
2622 # Flatten, validate and append the copy job to the instruction. We execute
2623 # them after parsing all the input so we can handle forward references.
2624 asToCopy = self.flattenAllSections(aasSections).split();
2625 if not asToCopy:
2626 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2627 for sToCopy in asToCopy:
2628 if sToCopy not in oInstr.asCopyTests:
2629 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2630 oInstr.asCopyTests.append(sToCopy);
2631 else:
2632 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2633 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2634 else:
2635 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2636
2637 _ = iEndLine;
2638 return True;
2639
2640 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2641 """
2642 Tag: \@oponlytest | \@oponly
2643 Value: none
2644
2645 Only test instructions with this tag. This is a trick that is handy
2646 for singling out one or two new instructions or tests.
2647
2648 See also \@optestignore.
2649 """
2650 oInstr = self.ensureInstructionForOpTag(iTagLine);
2651
2652 # Validate and add instruction to only test dictionary.
2653 sValue = self.flattenAllSections(aasSections).strip();
2654 if sValue:
2655 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2656
2657 if oInstr not in g_aoOnlyTestInstructions:
2658 g_aoOnlyTestInstructions.append(oInstr);
2659
2660 _ = iEndLine;
2661 return True;
2662
2663 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2664 """
2665 Tag: \@opxcpttype
2666 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2667
2668 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2669 """
2670 oInstr = self.ensureInstructionForOpTag(iTagLine);
2671
2672 # Flatten as a space separated list, split it up and validate the values.
2673 asTypes = self.flattenAllSections(aasSections).split();
2674 if len(asTypes) != 1:
2675 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2676 sType = asTypes[0];
2677 if sType not in g_kdXcptTypes:
2678 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2679 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2680 # Set it.
2681 if oInstr.sXcptType is not None:
2682 return self.errorComment(iTagLine,
2683 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2684 % ( sTag, oInstr.sXcptType, sType,));
2685 oInstr.sXcptType = sType;
2686
2687 _ = iEndLine;
2688 return True;
2689
2690 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2691 """
2692 Tag: \@opfunction
2693 Value: <VMM function name>
2694
2695 This is for explicitly setting the IEM function name. Normally we pick
2696 this up from the FNIEMOP_XXX macro invocation after the description, or
2697 generate it from the mnemonic and operands.
2698
2699 It it thought it maybe necessary to set it when specifying instructions
2700 which implementation isn't following immediately or aren't implemented yet.
2701 """
2702 oInstr = self.ensureInstructionForOpTag(iTagLine);
2703
2704 # Flatten and validate the value.
2705 sFunction = self.flattenAllSections(aasSections);
2706 if not self.oReFunctionName.match(sFunction):
2707 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2708 % (sTag, sFunction, self.oReFunctionName.pattern));
2709
2710 if oInstr.sFunction is not None:
2711 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2712 % (sTag, oInstr.sFunction, sFunction,));
2713 oInstr.sFunction = sFunction;
2714
2715 _ = iEndLine;
2716 return True;
2717
2718 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2719 """
2720 Tag: \@opstats
2721 Value: <VMM statistics base name>
2722
2723 This is for explicitly setting the statistics name. Normally we pick
2724 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2725 the mnemonic and operands.
2726
2727 It it thought it maybe necessary to set it when specifying instructions
2728 which implementation isn't following immediately or aren't implemented yet.
2729 """
2730 oInstr = self.ensureInstructionForOpTag(iTagLine);
2731
2732 # Flatten and validate the value.
2733 sStats = self.flattenAllSections(aasSections);
2734 if not self.oReStatsName.match(sStats):
2735 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2736 % (sTag, sStats, self.oReStatsName.pattern));
2737
2738 if oInstr.sStats is not None:
2739 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2740 % (sTag, oInstr.sStats, sStats,));
2741 oInstr.sStats = sStats;
2742
2743 _ = iEndLine;
2744 return True;
2745
2746 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2747 """
2748 Tag: \@opdone
2749 Value: none
2750
2751 Used to explictily flush the instructions that have been specified.
2752 """
2753 sFlattened = self.flattenAllSections(aasSections);
2754 if sFlattened != '':
2755 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2756 _ = sTag; _ = iEndLine;
2757 return self.doneInstructions();
2758
2759 ## @}
2760
2761
2762 def parseComment(self):
2763 """
2764 Parse the current comment (self.sComment).
2765
2766 If it's a opcode specifiying comment, we reset the macro stuff.
2767 """
2768 #
2769 # Reject if comment doesn't seem to contain anything interesting.
2770 #
2771 if self.sComment.find('Opcode') < 0 \
2772 and self.sComment.find('@') < 0:
2773 return False;
2774
2775 #
2776 # Split the comment into lines, removing leading asterisks and spaces.
2777 # Also remove leading and trailing empty lines.
2778 #
2779 asLines = self.sComment.split('\n');
2780 for iLine, sLine in enumerate(asLines):
2781 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2782
2783 while asLines and not asLines[0]:
2784 self.iCommentLine += 1;
2785 asLines.pop(0);
2786
2787 while asLines and not asLines[-1]:
2788 asLines.pop(len(asLines) - 1);
2789
2790 #
2791 # Check for old style: Opcode 0x0f 0x12
2792 #
2793 if asLines[0].startswith('Opcode '):
2794 self.parseCommentOldOpcode(asLines);
2795
2796 #
2797 # Look for @op* tagged data.
2798 #
2799 cOpTags = 0;
2800 sFlatDefault = None;
2801 sCurTag = '@default';
2802 iCurTagLine = 0;
2803 asCurSection = [];
2804 aasSections = [ asCurSection, ];
2805 for iLine, sLine in enumerate(asLines):
2806 if not sLine.startswith('@'):
2807 if sLine:
2808 asCurSection.append(sLine);
2809 elif asCurSection:
2810 asCurSection = [];
2811 aasSections.append(asCurSection);
2812 else:
2813 #
2814 # Process the previous tag.
2815 #
2816 if not asCurSection and len(aasSections) > 1:
2817 aasSections.pop(-1);
2818 if sCurTag in self.dTagHandlers:
2819 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2820 cOpTags += 1;
2821 elif sCurTag.startswith('@op'):
2822 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2823 elif sCurTag == '@default':
2824 sFlatDefault = self.flattenAllSections(aasSections);
2825 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2826 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2827 elif sCurTag in ['@encoding', '@opencoding']:
2828 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2829
2830 #
2831 # New tag.
2832 #
2833 asSplit = sLine.split(None, 1);
2834 sCurTag = asSplit[0].lower();
2835 if len(asSplit) > 1:
2836 asCurSection = [asSplit[1],];
2837 else:
2838 asCurSection = [];
2839 aasSections = [asCurSection, ];
2840 iCurTagLine = iLine;
2841
2842 #
2843 # Process the final tag.
2844 #
2845 if not asCurSection and len(aasSections) > 1:
2846 aasSections.pop(-1);
2847 if sCurTag in self.dTagHandlers:
2848 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2849 cOpTags += 1;
2850 elif sCurTag.startswith('@op'):
2851 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2852 elif sCurTag == '@default':
2853 sFlatDefault = self.flattenAllSections(aasSections);
2854
2855 #
2856 # Don't allow default text in blocks containing @op*.
2857 #
2858 if cOpTags > 0 and sFlatDefault:
2859 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2860
2861 return True;
2862
2863 def parseMacroInvocation(self, sInvocation):
2864 """
2865 Parses a macro invocation.
2866
2867 Returns a tuple, first element is the offset following the macro
2868 invocation. The second element is a list of macro arguments, where the
2869 zero'th is the macro name.
2870 """
2871 # First the name.
2872 offOpen = sInvocation.find('(');
2873 if offOpen <= 0:
2874 self.raiseError("macro invocation open parenthesis not found");
2875 sName = sInvocation[:offOpen].strip();
2876 if not self.oReMacroName.match(sName):
2877 return self.error("invalid macro name '%s'" % (sName,));
2878 asRet = [sName, ];
2879
2880 # Arguments.
2881 iLine = self.iLine;
2882 cDepth = 1;
2883 off = offOpen + 1;
2884 offStart = off;
2885 while cDepth > 0:
2886 if off >= len(sInvocation):
2887 if iLine >= len(self.asLines):
2888 return self.error('macro invocation beyond end of file');
2889 sInvocation += self.asLines[iLine];
2890 iLine += 1;
2891 ch = sInvocation[off];
2892
2893 if ch == ',' or ch == ')':
2894 if cDepth == 1:
2895 asRet.append(sInvocation[offStart:off].strip());
2896 offStart = off + 1;
2897 if ch == ')':
2898 cDepth -= 1;
2899 elif ch == '(':
2900 cDepth += 1;
2901 off += 1;
2902
2903 return (off, asRet);
2904
2905 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2906 """
2907 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2908 """
2909 offHit = sCode.find(sMacro);
2910 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2911 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2912 return (offHit + offAfter, asRet);
2913 return (len(sCode), None);
2914
2915 def findAndParseMacroInvocation(self, sCode, sMacro):
2916 """
2917 Returns None if not found, arguments as per parseMacroInvocation if found.
2918 """
2919 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2920
2921 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2922 """
2923 Returns same as findAndParseMacroInvocation.
2924 """
2925 for sMacro in asMacro:
2926 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2927 if asRet is not None:
2928 return asRet;
2929 return None;
2930
2931 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2932 sDisHints, sIemHints, asOperands):
2933 """
2934 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2935 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2936 """
2937 #
2938 # Some invocation checks.
2939 #
2940 if sUpper != sUpper.upper():
2941 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2942 if sLower != sLower.lower():
2943 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2944 if sUpper.lower() != sLower:
2945 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2946 if not self.oReMnemonic.match(sLower):
2947 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2948
2949 #
2950 # Check if sIemHints tells us to not consider this macro invocation.
2951 #
2952 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2953 return True;
2954
2955 # Apply to the last instruction only for now.
2956 if not self.aoCurInstrs:
2957 self.addInstruction();
2958 oInstr = self.aoCurInstrs[-1];
2959 if oInstr.iLineMnemonicMacro == -1:
2960 oInstr.iLineMnemonicMacro = self.iLine;
2961 else:
2962 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2963 % (sMacro, oInstr.iLineMnemonicMacro,));
2964
2965 # Mnemonic
2966 if oInstr.sMnemonic is None:
2967 oInstr.sMnemonic = sLower;
2968 elif oInstr.sMnemonic != sLower:
2969 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2970
2971 # Process operands.
2972 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2973 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2974 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2975 for iOperand, sType in enumerate(asOperands):
2976 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2977 if sWhere is None:
2978 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2979 if iOperand < len(oInstr.aoOperands): # error recovery.
2980 sWhere = oInstr.aoOperands[iOperand].sWhere;
2981 sType = oInstr.aoOperands[iOperand].sType;
2982 else:
2983 sWhere = 'reg';
2984 sType = 'Gb';
2985 if iOperand == len(oInstr.aoOperands):
2986 oInstr.aoOperands.append(Operand(sWhere, sType))
2987 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2988 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2989 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2990 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2991
2992 # Encoding.
2993 if sForm not in g_kdIemForms:
2994 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2995 else:
2996 if oInstr.sEncoding is None:
2997 oInstr.sEncoding = g_kdIemForms[sForm][0];
2998 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2999 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
3000 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
3001
3002 # Check the parameter locations for the encoding.
3003 if g_kdIemForms[sForm][1] is not None:
3004 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
3005 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
3006 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
3007 else:
3008 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
3009 if oInstr.aoOperands[iOperand].sWhere != sWhere:
3010 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
3011 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
3012
3013 # Stats.
3014 if not self.oReStatsName.match(sStats):
3015 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
3016 elif oInstr.sStats is None:
3017 oInstr.sStats = sStats;
3018 elif oInstr.sStats != sStats:
3019 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
3020 % (sMacro, oInstr.sStats, sStats,));
3021
3022 # Process the hints (simply merge with @ophints w/o checking anything).
3023 for sHint in sDisHints.split('|'):
3024 sHint = sHint.strip();
3025 if sHint.startswith('DISOPTYPE_'):
3026 sShortHint = sHint[len('DISOPTYPE_'):].lower();
3027 if sShortHint in g_kdHints:
3028 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3029 else:
3030 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
3031 elif sHint != '0':
3032 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
3033
3034 for sHint in sIemHints.split('|'):
3035 sHint = sHint.strip();
3036 if sHint.startswith('IEMOPHINT_'):
3037 sShortHint = sHint[len('IEMOPHINT_'):].lower();
3038 if sShortHint in g_kdHints:
3039 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3040 else:
3041 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
3042 elif sHint != '0':
3043 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
3044
3045
3046 _ = sAsm;
3047 return True;
3048
3049 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
3050 """
3051 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
3052 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
3053 """
3054 if not asOperands:
3055 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3056 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
3057 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3058
3059 def checkCodeForMacro(self, sCode):
3060 """
3061 Checks code for relevant macro invocation.
3062 """
3063 #
3064 # Scan macro invocations.
3065 #
3066 if sCode.find('(') > 0:
3067 # Look for instruction decoder function definitions. ASSUME single line.
3068 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3069 [ 'FNIEMOP_DEF',
3070 'FNIEMOP_STUB',
3071 'FNIEMOP_STUB_1',
3072 'FNIEMOP_UD_STUB',
3073 'FNIEMOP_UD_STUB_1' ]);
3074 if asArgs is not None:
3075 sFunction = asArgs[1];
3076
3077 if not self.aoCurInstrs:
3078 self.addInstruction();
3079 for oInstr in self.aoCurInstrs:
3080 if oInstr.iLineFnIemOpMacro == -1:
3081 oInstr.iLineFnIemOpMacro = self.iLine;
3082 else:
3083 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3084 self.setInstrunctionAttrib('sFunction', sFunction);
3085 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3086 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3087 if asArgs[0].find('STUB') > 0:
3088 self.doneInstructions();
3089 return True;
3090
3091 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3092 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3093 if asArgs is not None:
3094 if len(self.aoCurInstrs) == 1:
3095 oInstr = self.aoCurInstrs[0];
3096 if oInstr.sStats is None:
3097 oInstr.sStats = asArgs[1];
3098 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3099
3100 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3101 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3102 if asArgs is not None:
3103 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3104 []);
3105 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3106 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3107 if asArgs is not None:
3108 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3109 [asArgs[6],]);
3110 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3111 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3112 if asArgs is not None:
3113 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3114 [asArgs[6], asArgs[7]]);
3115 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3116 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3117 if asArgs is not None:
3118 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3119 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3120 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3121 # a_fIemHints)
3122 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3123 if asArgs is not None:
3124 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3125 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3126
3127 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3128 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3129 if asArgs is not None:
3130 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3131 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3132 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3133 if asArgs is not None:
3134 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3135 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3136 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3137 if asArgs is not None:
3138 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3139 [asArgs[4], asArgs[5],]);
3140 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3141 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3142 if asArgs is not None:
3143 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3144 [asArgs[4], asArgs[5], asArgs[6],]);
3145 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3146 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3147 if asArgs is not None:
3148 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3149 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3150
3151 return False;
3152
3153
3154 def parse(self):
3155 """
3156 Parses the given file.
3157 Returns number or errors.
3158 Raises exception on fatal trouble.
3159 """
3160 #self.debug('Parsing %s' % (self.sSrcFile,));
3161
3162 while self.iLine < len(self.asLines):
3163 sLine = self.asLines[self.iLine];
3164 self.iLine += 1;
3165
3166 # We only look for comments, so only lines with a slash might possibly
3167 # influence the parser state.
3168 offSlash = sLine.find('/');
3169 if offSlash >= 0:
3170 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3171 offLine = 0;
3172 while offLine < len(sLine):
3173 if self.iState == self.kiCode:
3174 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3175 if offHit >= 0:
3176 self.checkCodeForMacro(sLine[offLine:offHit]);
3177 self.sComment = '';
3178 self.iCommentLine = self.iLine;
3179 self.iState = self.kiCommentMulti;
3180 offLine = offHit + 2;
3181 else:
3182 self.checkCodeForMacro(sLine[offLine:]);
3183 offLine = len(sLine);
3184
3185 elif self.iState == self.kiCommentMulti:
3186 offHit = sLine.find('*/', offLine);
3187 if offHit >= 0:
3188 self.sComment += sLine[offLine:offHit];
3189 self.iState = self.kiCode;
3190 offLine = offHit + 2;
3191 self.parseComment();
3192 else:
3193 self.sComment += sLine[offLine:];
3194 offLine = len(sLine);
3195 else:
3196 assert False;
3197 # C++ line comment.
3198 elif offSlash > 0:
3199 self.checkCodeForMacro(sLine[:offSlash]);
3200
3201 # No slash, but append the line if in multi-line comment.
3202 elif self.iState == self.kiCommentMulti:
3203 #self.debug('line %d: multi' % (self.iLine,));
3204 self.sComment += sLine;
3205
3206 # No slash, but check code line for relevant macro.
3207 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3208 #self.debug('line %d: macro' % (self.iLine,));
3209 self.checkCodeForMacro(sLine);
3210
3211 # If the line is a '}' in the first position, complete the instructions.
3212 elif self.iState == self.kiCode and sLine[0] == '}':
3213 #self.debug('line %d: }' % (self.iLine,));
3214 self.doneInstructions();
3215
3216 self.doneInstructions();
3217 self.debug('%3s stubs out of %3s instructions in %s'
3218 % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3219 return self.printErrors();
3220
3221
3222def __parseFileByName(sSrcFile, sDefaultMap):
3223 """
3224 Parses one source file for instruction specfications.
3225 """
3226 #
3227 # Read sSrcFile into a line array.
3228 #
3229 try:
3230 oFile = open(sSrcFile, "r");
3231 except Exception as oXcpt:
3232 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3233 try:
3234 asLines = oFile.readlines();
3235 except Exception as oXcpt:
3236 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3237 finally:
3238 oFile.close();
3239
3240 #
3241 # Do the parsing.
3242 #
3243 try:
3244 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3245 except ParserException as oXcpt:
3246 print(str(oXcpt));
3247 raise;
3248 except Exception as oXcpt:
3249 raise;
3250
3251 return cErrors;
3252
3253
3254def __doTestCopying():
3255 """
3256 Executes the asCopyTests instructions.
3257 """
3258 asErrors = [];
3259 for oDstInstr in g_aoAllInstructions:
3260 if oDstInstr.asCopyTests:
3261 for sSrcInstr in oDstInstr.asCopyTests:
3262 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3263 if oSrcInstr:
3264 aoSrcInstrs = [oSrcInstr,];
3265 else:
3266 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3267 if aoSrcInstrs:
3268 for oSrcInstr in aoSrcInstrs:
3269 if oSrcInstr != oDstInstr:
3270 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3271 else:
3272 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3273 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3274 else:
3275 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3276 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3277
3278 if asErrors:
3279 sys.stderr.write(u''.join(asErrors));
3280 return len(asErrors);
3281
3282
3283def __applyOnlyTest():
3284 """
3285 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3286 all other instructions so that only these get tested.
3287 """
3288 if g_aoOnlyTestInstructions:
3289 for oInstr in g_aoAllInstructions:
3290 if oInstr.aoTests:
3291 if oInstr not in g_aoOnlyTestInstructions:
3292 oInstr.aoTests = [];
3293 return 0;
3294
3295def __parseAll():
3296 """
3297 Parses all the IEMAllInstruction*.cpp.h files.
3298
3299 Raises exception on failure.
3300 """
3301 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3302 cErrors = 0;
3303 for sDefaultMap, sName in [
3304 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3305 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3306 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3307 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3308 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3309 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3310 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3311 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3312 ]:
3313 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3314 cErrors += __doTestCopying();
3315 cErrors += __applyOnlyTest();
3316
3317 if cErrors != 0:
3318 #raise Exception('%d parse errors' % (cErrors,));
3319 sys.exit(1);
3320 return True;
3321
3322
3323
3324__parseAll();
3325
3326
3327#
3328# Generators (may perhaps move later).
3329#
3330def generateDisassemblerTables(oDstFile = sys.stdout):
3331 """
3332 Generates disassembler tables.
3333 """
3334
3335 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3336 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3337 assert oMap.sName == sName;
3338 asLines = [];
3339
3340 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3341 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3342 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3343 asLines.append('{');
3344
3345 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3346
3347 aoTableOrder = oMap.getInstructionsInTableOrder();
3348 for iInstr, oInstr in enumerate(aoTableOrder):
3349
3350 if (iInstr & 0xf) == 0:
3351 if iInstr != 0:
3352 asLines.append('');
3353 asLines.append(' /* %x */' % (iInstr >> 4,));
3354
3355 if oInstr is None:
3356 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3357 elif isinstance(oInstr, list):
3358 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3359 else:
3360 sMacro = 'OP';
3361 cMaxOperands = 3;
3362 if len(oInstr.aoOperands) > 3:
3363 sMacro = 'OPVEX'
3364 cMaxOperands = 4;
3365 assert len(oInstr.aoOperands) <= cMaxOperands;
3366
3367 #
3368 # Format string.
3369 #
3370 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3371 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3372 sTmp += ' ' if iOperand == 0 else ',';
3373 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3374 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3375 else:
3376 sTmp += g_kdOpTypes[oOperand.sType][2];
3377 sTmp += '",';
3378 asColumns = [ sTmp, ];
3379
3380 #
3381 # Decoders.
3382 #
3383 iStart = len(asColumns);
3384 if oInstr.sEncoding is None:
3385 pass;
3386 elif oInstr.sEncoding == 'ModR/M':
3387 # ASSUME the first operand is using the ModR/M encoding
3388 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3389 asColumns.append('IDX_ParseModRM,');
3390 ## @todo IDX_ParseVexDest
3391 # Is second operand using ModR/M too?
3392 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3393 asColumns.append('IDX_UseModRM,')
3394 elif oInstr.sEncoding in [ 'prefix', ]:
3395 for oOperand in oInstr.aoOperands:
3396 asColumns.append('0,');
3397 elif oInstr.sEncoding in [ 'fixed' ]:
3398 pass;
3399 elif oInstr.sEncoding == 'vex2':
3400 asColumns.append('IDX_ParseVex2b,')
3401 elif oInstr.sEncoding == 'vex3':
3402 asColumns.append('IDX_ParseVex3b,')
3403 elif oInstr.sEncoding in g_dInstructionMaps:
3404 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3405 else:
3406 ## @todo
3407 #IDX_ParseTwoByteEsc,
3408 #IDX_ParseGrp1,
3409 #IDX_ParseShiftGrp2,
3410 #IDX_ParseGrp3,
3411 #IDX_ParseGrp4,
3412 #IDX_ParseGrp5,
3413 #IDX_Parse3DNow,
3414 #IDX_ParseGrp6,
3415 #IDX_ParseGrp7,
3416 #IDX_ParseGrp8,
3417 #IDX_ParseGrp9,
3418 #IDX_ParseGrp10,
3419 #IDX_ParseGrp12,
3420 #IDX_ParseGrp13,
3421 #IDX_ParseGrp14,
3422 #IDX_ParseGrp15,
3423 #IDX_ParseGrp16,
3424 #IDX_ParseThreeByteEsc4,
3425 #IDX_ParseThreeByteEsc5,
3426 #IDX_ParseModFence,
3427 #IDX_ParseEscFP,
3428 #IDX_ParseNopPause,
3429 #IDX_ParseInvOpModRM,
3430 assert False, str(oInstr);
3431
3432 # Check for immediates and stuff in the remaining operands.
3433 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3434 sIdx = g_kdOpTypes[oOperand.sType][0];
3435 if sIdx != 'IDX_UseModRM':
3436 asColumns.append(sIdx + ',');
3437 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3438
3439 #
3440 # Opcode and operands.
3441 #
3442 assert oInstr.sDisEnum, str(oInstr);
3443 asColumns.append(oInstr.sDisEnum + ',');
3444 iStart = len(asColumns)
3445 for oOperand in oInstr.aoOperands:
3446 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3447 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3448
3449 #
3450 # Flags.
3451 #
3452 sTmp = '';
3453 for sHint in sorted(oInstr.dHints.keys()):
3454 sDefine = g_kdHints[sHint];
3455 if sDefine.startswith('DISOPTYPE_'):
3456 if sTmp:
3457 sTmp += ' | ' + sDefine;
3458 else:
3459 sTmp += sDefine;
3460 if sTmp:
3461 sTmp += '),';
3462 else:
3463 sTmp += '0),';
3464 asColumns.append(sTmp);
3465
3466 #
3467 # Format the columns into a line.
3468 #
3469 sLine = '';
3470 for i, s in enumerate(asColumns):
3471 if len(sLine) < aoffColumns[i]:
3472 sLine += ' ' * (aoffColumns[i] - len(sLine));
3473 else:
3474 sLine += ' ';
3475 sLine += s;
3476
3477 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3478 # DISOPTYPE_HARMLESS),
3479 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3480 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3481
3482 asLines.append(sLine);
3483
3484 asLines.append('};');
3485 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3486
3487 #
3488 # Write out the lines.
3489 #
3490 oDstFile.write('\n'.join(asLines));
3491 oDstFile.write('\n');
3492 break; #for now
3493
3494if __name__ == '__main__':
3495 generateDisassemblerTables();
3496
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette