VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 67004

Last change on this file since 67004 was 67004, checked in by vboxsync, 8 years ago

IEM: movq Vq,Eq & movd Vd,Ed docs+tests+fixes.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 146.0 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 67004 2017-05-22 10:20:28Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 67004 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', ),
217 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', ),
218 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
219 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
220 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
221 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
222 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
223 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
224 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
225 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
226 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
227 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
228 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
229 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
230 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
231 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
232 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', ),
233
234 # ModR/M.rm - register only.
235 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
236 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
237 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
238 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
239 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
240 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
241 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', ),
242
243 # ModR/M.rm - memory only.
244 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
245 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
246 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
247 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
248 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
249 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
250 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
251 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', ),
252 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', ),
253 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
254 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
255
256 # ModR/M.reg
257 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
258 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
259 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
260 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
261 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', ),
262 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', ),
263 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', ),
264 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', ),
265 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', ),
266 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
267 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
268 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
269 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
270 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
271 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
272 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
273 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
274 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
275 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
276 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
277 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
278 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', ),
279 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
280 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
281 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', ),
282 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', ),
283
284 # VEX.vvvv
285 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', ),
286 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', ),
287 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', ),
288
289 # Immediate values.
290 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
291 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
292 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
293 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
294 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
295 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
296
297 # Address operands (no ModR/M).
298 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
299 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
300
301 # Relative jump targets
302 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
303 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
304
305 # DS:rSI
306 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
307 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
308 # ES:rDI
309 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
310 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
311
312 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
313
314 # Fixed registers.
315 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
316 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
317 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
318 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
319 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
320 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
321 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
322 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
323};
324
325# IDX_ParseFixedReg
326# IDX_ParseVexDest
327
328
329## IEMFORM_XXX mappings.
330g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
331 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
332 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
333 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
334 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
335 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
336 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
337 'M': ( 'ModR/M', [ 'rm', ], ),
338 'M_REG': ( 'ModR/M', [ 'rm', ], ),
339 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
340 'R': ( 'ModR/M', [ 'reg', ], ),
341
342 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
343 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
344 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
345 'VEX_XM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ), # same as VEX_RM_MEM, typo?
346 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
347 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
348 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
349 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
350 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
351 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
352 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
353 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
354 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
355 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
356 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
357 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
358 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
359
360 'FIXED': ( 'fixed', None, )
361};
362
363## \@oppfx values.
364g_kdPrefixes = {
365 'none': [],
366 '0x66': [],
367 '0xf3': [],
368 '0xf2': [],
369};
370
371## Special \@opcode tag values.
372g_kdSpecialOpcodes = {
373 '/reg': [],
374 'mr/reg': [],
375 '11 /reg': [],
376 '!11 /reg': [],
377 '11 mr/reg': [],
378 '!11 mr/reg': [],
379};
380
381## Special \@opcodesub tag values.
382g_kdSubOpcodes = {
383 'none': [ None, ],
384 '11 mr/reg': [ '11 mr/reg', ],
385 '11': [ '11 mr/reg', ], ##< alias
386 '!11 mr/reg': [ '!11 mr/reg', ],
387 '!11': [ '!11 mr/reg', ], ##< alias
388 'rex.w=0': [ 'rex.w=0', ],
389 'w=0': [ 'rex.w=0', ], ##< alias
390 'rex.w=1': [ 'rex.w=1', ],
391 'w=1': [ 'rex.w=1', ], ##< alias
392};
393
394## Valid values for \@openc
395g_kdEncodings = {
396 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
397 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
398 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
399 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
400 'prefix': [ None, ], ##< Prefix
401};
402
403## \@opunused, \@opinvalid, \@opinvlstyle
404g_kdInvalidStyles = {
405 'immediate': [], ##< CPU stops decoding immediately after the opcode.
406 'vex.modrm': [], ##< VEX+ModR/M, everyone.
407 'intel-modrm': [], ##< Intel decodes ModR/M.
408 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
409 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
410 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
411};
412
413g_kdCpuNames = {
414 '8086': (),
415 '80186': (),
416 '80286': (),
417 '80386': (),
418 '80486': (),
419};
420
421## \@opcpuid
422g_kdCpuIdFlags = {
423 'vme': 'X86_CPUID_FEATURE_EDX_VME',
424 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
425 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
426 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
427 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
428 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
429 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
430 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
431 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
432 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
433 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
434 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
435 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
436 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
437 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
438 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
439 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
440 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
441 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
442 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
443 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
444 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
445 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
446 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
447 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
448 'aes': 'X86_CPUID_FEATURE_ECX_AES',
449 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
450 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
451 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
452 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
453
454 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
455 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
456 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
457 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
458 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
459 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
460 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
461 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
462 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
463 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
464};
465
466## \@ophints values.
467g_kdHints = {
468 'invalid': 'DISOPTYPE_INVALID', ##<
469 'harmless': 'DISOPTYPE_HARMLESS', ##<
470 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
471 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
472 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
473 'portio': 'DISOPTYPE_PORTIO', ##<
474 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
475 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
476 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
477 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
478 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
479 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
480 'illegal': 'DISOPTYPE_ILLEGAL', ##<
481 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
482 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
483 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
484 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
485 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
486 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
487 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
488 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
489 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
490 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
491 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
492 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
493 ## (only in 16 & 32 bits mode!)
494 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
495 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
496 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
497 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
498 'ignores_rexw': '', ##< Ignores REX.W.
499 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
500 'ignores_vex_l': '', ##< Ignores VEX.L.
501 'vex_l_zero': '', ##< VEX.L must be 0.
502 'lock_allowed': '', ##< Lock prefix allowed.
503};
504
505## \@opxcpttype values (see SDMv2 2.4, 2.7).
506g_kdXcptTypes = {
507 'none': [],
508 '1': [],
509 '2': [],
510 '3': [],
511 '4': [],
512 '4UA': [],
513 '5': [],
514 '5LZ': [], # LZ = VEX.L must be zero.
515 '6': [],
516 '7': [],
517 '7LZ': [],
518 '8': [],
519 '11': [],
520 '12': [],
521 'E1': [],
522 'E1NF': [],
523 'E2': [],
524 'E3': [],
525 'E3NF': [],
526 'E4': [],
527 'E4NF': [],
528 'E5': [],
529 'E5NF': [],
530 'E6': [],
531 'E6NF': [],
532 'E7NF': [],
533 'E9': [],
534 'E9NF': [],
535 'E10': [],
536 'E11': [],
537 'E12': [],
538 'E12NF': [],
539};
540
541
542def _isValidOpcodeByte(sOpcode):
543 """
544 Checks if sOpcode is a valid lower case opcode byte.
545 Returns true/false.
546 """
547 if len(sOpcode) == 4:
548 if sOpcode[:2] == '0x':
549 if sOpcode[2] in '0123456789abcdef':
550 if sOpcode[3] in '0123456789abcdef':
551 return True;
552 return False;
553
554
555class InstructionMap(object):
556 """
557 Instruction map.
558
559 The opcode map provides the lead opcode bytes (empty for the one byte
560 opcode map). An instruction can be member of multiple opcode maps as long
561 as it uses the same opcode value within the map (because of VEX).
562 """
563
564 kdEncodings = {
565 'legacy': [],
566 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
567 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
568 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
569 'xop8': [], ##< XOP prefix with vvvvv = 8
570 'xop9': [], ##< XOP prefix with vvvvv = 9
571 'xop10': [], ##< XOP prefix with vvvvv = 10
572 };
573 ## Selectors.
574 ## The first value is the number of table entries required by a
575 ## decoder or disassembler for this type of selector.
576 kdSelectors = {
577 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
578 '/r': [ 8, ], ##< modrm.reg selects the instruction.
579 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
580 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
581 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
582 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
583 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
584 };
585
586 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
587 assert sSelector in self.kdSelectors;
588 assert sEncoding in self.kdEncodings;
589 if asLeadOpcodes is None:
590 asLeadOpcodes = [];
591 else:
592 for sOpcode in asLeadOpcodes:
593 assert _isValidOpcodeByte(sOpcode);
594 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
595
596 self.sName = sName;
597 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
598 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
599 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
600 self.aoInstructions = []; # type: Instruction
601 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
602
603 def getTableSize(self):
604 """
605 Number of table entries. This corresponds directly to the selector.
606 """
607 return self.kdSelectors[self.sSelector][0];
608
609 def getInstructionIndex(self, oInstr):
610 """
611 Returns the table index for the instruction.
612 """
613 bOpcode = oInstr.getOpcodeByte();
614
615 # The byte selector is simple. We need a full opcode byte and need just return it.
616 if self.sSelector == 'byte':
617 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
618 return bOpcode;
619
620 # The other selectors needs masking and shifting.
621 if self.sSelector == '/r':
622 return (bOpcode >> 3) & 0x7;
623
624 if self.sSelector == 'mod /r':
625 return (bOpcode >> 3) & 0x1f;
626
627 if self.sSelector == 'memreg /r':
628 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
629
630 if self.sSelector == '!11 /r':
631 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
632 return (bOpcode >> 3) & 0x7;
633
634 if self.sSelector == '11 /r':
635 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
636 return (bOpcode >> 3) & 0x7;
637
638 if self.sSelector == '11':
639 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
640 return bOpcode & 0x3f;
641
642 assert False, self.sSelector;
643 return -1;
644
645 def getInstructionsInTableOrder(self):
646 """
647 Get instructions in table order.
648
649 Returns array of instructions. Normally there is exactly one
650 instruction per entry. However the entry could also be None if
651 not instruction was specified for that opcode value. Or there
652 could be a list of instructions to deal with special encodings
653 where for instance prefix (e.g. REX.W) encodes a different
654 instruction or different CPUs have different instructions or
655 prefixes in the same place.
656 """
657 # Start with empty table.
658 cTable = self.getTableSize();
659 aoTable = [None] * cTable;
660
661 # Insert the instructions.
662 for oInstr in self.aoInstructions:
663 if oInstr.sOpcode:
664 idxOpcode = self.getInstructionIndex(oInstr);
665 assert idxOpcode < cTable, str(idxOpcode);
666
667 oExisting = aoTable[idxOpcode];
668 if oExisting is None:
669 aoTable[idxOpcode] = oInstr;
670 elif not isinstance(oExisting, list):
671 aoTable[idxOpcode] = list([oExisting, oInstr]);
672 else:
673 oExisting.append(oInstr);
674
675 return aoTable;
676
677
678 def getDisasTableName(self):
679 """
680 Returns the disassembler table name for this map.
681 """
682 sName = 'g_aDisas';
683 for sWord in self.sName.split('_'):
684 if sWord == 'm': # suffix indicating modrm.mod==mem
685 sName += '_m';
686 elif sWord == 'r': # suffix indicating modrm.mod==reg
687 sName += '_r';
688 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
689 sName += '_' + sWord;
690 else:
691 sWord = sWord.replace('grp', 'Grp');
692 sWord = sWord.replace('map', 'Map');
693 sName += sWord[0].upper() + sWord[1:];
694 return sName;
695
696
697 def isVexMap(self):
698 """ Returns True if a VEX map. """
699 return self.sEncoding.startswith('vex');
700
701
702class TestType(object):
703 """
704 Test value type.
705
706 This base class deals with integer like values. The fUnsigned constructor
707 parameter indicates the default stance on zero vs sign extending. It is
708 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
709 """
710 def __init__(self, sName, acbSizes = None, fUnsigned = True):
711 self.sName = sName;
712 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
713 self.fUnsigned = fUnsigned;
714
715 class BadValue(Exception):
716 """ Bad value exception. """
717 def __init__(self, sMessage):
718 Exception.__init__(self, sMessage);
719 self.sMessage = sMessage;
720
721 ## For ascii ~ operator.
722 kdHexInv = {
723 '0': 'f',
724 '1': 'e',
725 '2': 'd',
726 '3': 'c',
727 '4': 'b',
728 '5': 'a',
729 '6': '9',
730 '7': '8',
731 '8': '7',
732 '9': '6',
733 'a': '5',
734 'b': '4',
735 'c': '3',
736 'd': '2',
737 'e': '1',
738 'f': '0',
739 };
740
741 def get(self, sValue):
742 """
743 Get the shortest normal sized byte representation of oValue.
744
745 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
746 The latter form is for AND+OR pairs where the first entry is what to
747 AND with the field and the second the one or OR with.
748
749 Raises BadValue if invalid value.
750 """
751 if not sValue:
752 raise TestType.BadValue('empty value');
753
754 # Deal with sign and detect hexadecimal or decimal.
755 fSignExtend = not self.fUnsigned;
756 if sValue[0] == '-' or sValue[0] == '+':
757 fSignExtend = True;
758 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
759 else:
760 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
761
762 # try convert it to long integer.
763 try:
764 iValue = long(sValue, 16 if fHex else 10);
765 except Exception as oXcpt:
766 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
767
768 # Convert the hex string and pad it to a decent value. Negative values
769 # needs to be manually converted to something non-negative (~-n + 1).
770 if iValue >= 0:
771 sHex = hex(iValue);
772 if sys.version_info[0] < 3:
773 assert sHex[-1] == 'L';
774 sHex = sHex[:-1];
775 assert sHex[:2] == '0x';
776 sHex = sHex[2:];
777 else:
778 sHex = hex(-iValue - 1);
779 if sys.version_info[0] < 3:
780 assert sHex[-1] == 'L';
781 sHex = sHex[:-1];
782 assert sHex[:2] == '0x';
783 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
784 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
785 sHex = 'f' + sHex;
786
787 cDigits = len(sHex);
788 if cDigits <= self.acbSizes[-1] * 2:
789 for cb in self.acbSizes:
790 cNaturalDigits = cb * 2;
791 if cDigits <= cNaturalDigits:
792 break;
793 else:
794 cNaturalDigits = self.acbSizes[-1] * 2;
795 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
796 assert isinstance(cNaturalDigits, int)
797
798 if cNaturalDigits != cDigits:
799 cNeeded = cNaturalDigits - cDigits;
800 if iValue >= 0:
801 sHex = ('0' * cNeeded) + sHex;
802 else:
803 sHex = ('f' * cNeeded) + sHex;
804
805 # Invert and convert to bytearray and return it.
806 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
807
808 return ((fSignExtend, abValue),);
809
810 def validate(self, sValue):
811 """
812 Returns True if value is okay, error message on failure.
813 """
814 try:
815 self.get(sValue);
816 except TestType.BadValue as oXcpt:
817 return oXcpt.sMessage;
818 return True;
819
820 def isAndOrPair(self, sValue):
821 """
822 Checks if sValue is a pair.
823 """
824 _ = sValue;
825 return False;
826
827
828class TestTypeEflags(TestType):
829 """
830 Special value parsing for EFLAGS/RFLAGS/FLAGS.
831 """
832
833 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
834
835 def __init__(self, sName):
836 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
837
838 def get(self, sValue):
839 fClear = 0;
840 fSet = 0;
841 for sFlag in sValue.split(','):
842 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
843 if sConstant is None:
844 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
845 if sConstant[0] == '!':
846 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
847 else:
848 fSet |= g_kdX86EFlagsConstants[sConstant];
849
850 aoSet = TestType.get(self, '0x%x' % (fSet,));
851 if fClear != 0:
852 aoClear = TestType.get(self, '%#x' % (fClear,))
853 assert self.isAndOrPair(sValue) is True;
854 return (aoClear[0], aoSet[0]);
855 assert self.isAndOrPair(sValue) is False;
856 return aoSet;
857
858 def isAndOrPair(self, sValue):
859 for sZeroFlag in self.kdZeroValueFlags:
860 if sValue.find(sZeroFlag) >= 0:
861 return True;
862 return False;
863
864class TestTypeFromDict(TestType):
865 """
866 Special value parsing for CR0.
867 """
868
869 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
870
871 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
872 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
873 self.kdConstantsAndValues = kdConstantsAndValues;
874 self.sConstantPrefix = sConstantPrefix;
875
876 def get(self, sValue):
877 fValue = 0;
878 for sFlag in sValue.split(','):
879 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
880 if fFlagValue is None:
881 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
882 fValue |= fFlagValue;
883 return TestType.get(self, '0x%x' % (fValue,));
884
885
886class TestInOut(object):
887 """
888 One input or output state modifier.
889
890 This should be thought as values to modify BS3REGCTX and extended (needs
891 to be structured) state.
892 """
893 ## Assigned operators.
894 kasOperators = [
895 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
896 '&~=',
897 '&=',
898 '|=',
899 '='
900 ];
901 ## Types
902 kdTypes = {
903 'uint': TestType('uint', fUnsigned = True),
904 'int': TestType('int'),
905 'efl': TestTypeEflags('efl'),
906 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
907 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
908 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
909 };
910 ## CPU context fields.
911 kdFields = {
912 # name: ( default type, [both|input|output], )
913 # Operands.
914 'op1': ( 'uint', 'both', ), ## \@op1
915 'op2': ( 'uint', 'both', ), ## \@op2
916 'op3': ( 'uint', 'both', ), ## \@op3
917 'op4': ( 'uint', 'both', ), ## \@op4
918 # Flags.
919 'efl': ( 'efl', 'both', ),
920 'efl_undef': ( 'uint', 'output', ),
921 # 8-bit GPRs.
922 'al': ( 'uint', 'both', ),
923 'cl': ( 'uint', 'both', ),
924 'dl': ( 'uint', 'both', ),
925 'bl': ( 'uint', 'both', ),
926 'ah': ( 'uint', 'both', ),
927 'ch': ( 'uint', 'both', ),
928 'dh': ( 'uint', 'both', ),
929 'bh': ( 'uint', 'both', ),
930 'r8l': ( 'uint', 'both', ),
931 'r9l': ( 'uint', 'both', ),
932 'r10l': ( 'uint', 'both', ),
933 'r11l': ( 'uint', 'both', ),
934 'r12l': ( 'uint', 'both', ),
935 'r13l': ( 'uint', 'both', ),
936 'r14l': ( 'uint', 'both', ),
937 'r15l': ( 'uint', 'both', ),
938 # 16-bit GPRs.
939 'ax': ( 'uint', 'both', ),
940 'dx': ( 'uint', 'both', ),
941 'cx': ( 'uint', 'both', ),
942 'bx': ( 'uint', 'both', ),
943 'sp': ( 'uint', 'both', ),
944 'bp': ( 'uint', 'both', ),
945 'si': ( 'uint', 'both', ),
946 'di': ( 'uint', 'both', ),
947 'r8w': ( 'uint', 'both', ),
948 'r9w': ( 'uint', 'both', ),
949 'r10w': ( 'uint', 'both', ),
950 'r11w': ( 'uint', 'both', ),
951 'r12w': ( 'uint', 'both', ),
952 'r13w': ( 'uint', 'both', ),
953 'r14w': ( 'uint', 'both', ),
954 'r15w': ( 'uint', 'both', ),
955 # 32-bit GPRs.
956 'eax': ( 'uint', 'both', ),
957 'edx': ( 'uint', 'both', ),
958 'ecx': ( 'uint', 'both', ),
959 'ebx': ( 'uint', 'both', ),
960 'esp': ( 'uint', 'both', ),
961 'ebp': ( 'uint', 'both', ),
962 'esi': ( 'uint', 'both', ),
963 'edi': ( 'uint', 'both', ),
964 'r8d': ( 'uint', 'both', ),
965 'r9d': ( 'uint', 'both', ),
966 'r10d': ( 'uint', 'both', ),
967 'r11d': ( 'uint', 'both', ),
968 'r12d': ( 'uint', 'both', ),
969 'r13d': ( 'uint', 'both', ),
970 'r14d': ( 'uint', 'both', ),
971 'r15d': ( 'uint', 'both', ),
972 # 64-bit GPRs.
973 'rax': ( 'uint', 'both', ),
974 'rdx': ( 'uint', 'both', ),
975 'rcx': ( 'uint', 'both', ),
976 'rbx': ( 'uint', 'both', ),
977 'rsp': ( 'uint', 'both', ),
978 'rbp': ( 'uint', 'both', ),
979 'rsi': ( 'uint', 'both', ),
980 'rdi': ( 'uint', 'both', ),
981 'r8': ( 'uint', 'both', ),
982 'r9': ( 'uint', 'both', ),
983 'r10': ( 'uint', 'both', ),
984 'r11': ( 'uint', 'both', ),
985 'r12': ( 'uint', 'both', ),
986 'r13': ( 'uint', 'both', ),
987 'r14': ( 'uint', 'both', ),
988 'r15': ( 'uint', 'both', ),
989 # 16-bit, 32-bit or 64-bit registers according to operand size.
990 'oz.rax': ( 'uint', 'both', ),
991 'oz.rdx': ( 'uint', 'both', ),
992 'oz.rcx': ( 'uint', 'both', ),
993 'oz.rbx': ( 'uint', 'both', ),
994 'oz.rsp': ( 'uint', 'both', ),
995 'oz.rbp': ( 'uint', 'both', ),
996 'oz.rsi': ( 'uint', 'both', ),
997 'oz.rdi': ( 'uint', 'both', ),
998 'oz.r8': ( 'uint', 'both', ),
999 'oz.r9': ( 'uint', 'both', ),
1000 'oz.r10': ( 'uint', 'both', ),
1001 'oz.r11': ( 'uint', 'both', ),
1002 'oz.r12': ( 'uint', 'both', ),
1003 'oz.r13': ( 'uint', 'both', ),
1004 'oz.r14': ( 'uint', 'both', ),
1005 'oz.r15': ( 'uint', 'both', ),
1006 # Control registers.
1007 'cr0': ( 'cr0', 'both', ),
1008 'cr4': ( 'cr4', 'both', ),
1009 'xcr0': ( 'xcr0', 'both', ),
1010 # FPU Registers
1011 'fcw': ( 'uint', 'both', ),
1012 'fsw': ( 'uint', 'both', ),
1013 'ftw': ( 'uint', 'both', ),
1014 'fop': ( 'uint', 'both', ),
1015 'fpuip': ( 'uint', 'both', ),
1016 'fpucs': ( 'uint', 'both', ),
1017 'fpudp': ( 'uint', 'both', ),
1018 'fpuds': ( 'uint', 'both', ),
1019 'mxcsr': ( 'uint', 'both', ),
1020 'st0': ( 'uint', 'both', ),
1021 'st1': ( 'uint', 'both', ),
1022 'st2': ( 'uint', 'both', ),
1023 'st3': ( 'uint', 'both', ),
1024 'st4': ( 'uint', 'both', ),
1025 'st5': ( 'uint', 'both', ),
1026 'st6': ( 'uint', 'both', ),
1027 'st7': ( 'uint', 'both', ),
1028 # MMX registers.
1029 'mm0': ( 'uint', 'both', ),
1030 'mm1': ( 'uint', 'both', ),
1031 'mm2': ( 'uint', 'both', ),
1032 'mm3': ( 'uint', 'both', ),
1033 'mm4': ( 'uint', 'both', ),
1034 'mm5': ( 'uint', 'both', ),
1035 'mm6': ( 'uint', 'both', ),
1036 'mm7': ( 'uint', 'both', ),
1037 # SSE registers.
1038 'xmm0': ( 'uint', 'both', ),
1039 'xmm1': ( 'uint', 'both', ),
1040 'xmm2': ( 'uint', 'both', ),
1041 'xmm3': ( 'uint', 'both', ),
1042 'xmm4': ( 'uint', 'both', ),
1043 'xmm5': ( 'uint', 'both', ),
1044 'xmm6': ( 'uint', 'both', ),
1045 'xmm7': ( 'uint', 'both', ),
1046 'xmm8': ( 'uint', 'both', ),
1047 'xmm9': ( 'uint', 'both', ),
1048 'xmm10': ( 'uint', 'both', ),
1049 'xmm11': ( 'uint', 'both', ),
1050 'xmm12': ( 'uint', 'both', ),
1051 'xmm13': ( 'uint', 'both', ),
1052 'xmm14': ( 'uint', 'both', ),
1053 'xmm15': ( 'uint', 'both', ),
1054 'xmm0.lo': ( 'uint', 'both', ),
1055 'xmm1.lo': ( 'uint', 'both', ),
1056 'xmm2.lo': ( 'uint', 'both', ),
1057 'xmm3.lo': ( 'uint', 'both', ),
1058 'xmm4.lo': ( 'uint', 'both', ),
1059 'xmm5.lo': ( 'uint', 'both', ),
1060 'xmm6.lo': ( 'uint', 'both', ),
1061 'xmm7.lo': ( 'uint', 'both', ),
1062 'xmm8.lo': ( 'uint', 'both', ),
1063 'xmm9.lo': ( 'uint', 'both', ),
1064 'xmm10.lo': ( 'uint', 'both', ),
1065 'xmm11.lo': ( 'uint', 'both', ),
1066 'xmm12.lo': ( 'uint', 'both', ),
1067 'xmm13.lo': ( 'uint', 'both', ),
1068 'xmm14.lo': ( 'uint', 'both', ),
1069 'xmm15.lo': ( 'uint', 'both', ),
1070 'xmm0.hi': ( 'uint', 'both', ),
1071 'xmm1.hi': ( 'uint', 'both', ),
1072 'xmm2.hi': ( 'uint', 'both', ),
1073 'xmm3.hi': ( 'uint', 'both', ),
1074 'xmm4.hi': ( 'uint', 'both', ),
1075 'xmm5.hi': ( 'uint', 'both', ),
1076 'xmm6.hi': ( 'uint', 'both', ),
1077 'xmm7.hi': ( 'uint', 'both', ),
1078 'xmm8.hi': ( 'uint', 'both', ),
1079 'xmm9.hi': ( 'uint', 'both', ),
1080 'xmm10.hi': ( 'uint', 'both', ),
1081 'xmm11.hi': ( 'uint', 'both', ),
1082 'xmm12.hi': ( 'uint', 'both', ),
1083 'xmm13.hi': ( 'uint', 'both', ),
1084 'xmm14.hi': ( 'uint', 'both', ),
1085 'xmm15.hi': ( 'uint', 'both', ),
1086 'xmm0.lo.zx': ( 'uint', 'both', ),
1087 'xmm1.lo.zx': ( 'uint', 'both', ),
1088 'xmm2.lo.zx': ( 'uint', 'both', ),
1089 'xmm3.lo.zx': ( 'uint', 'both', ),
1090 'xmm4.lo.zx': ( 'uint', 'both', ),
1091 'xmm5.lo.zx': ( 'uint', 'both', ),
1092 'xmm6.lo.zx': ( 'uint', 'both', ),
1093 'xmm7.lo.zx': ( 'uint', 'both', ),
1094 'xmm8.lo.zx': ( 'uint', 'both', ),
1095 'xmm9.lo.zx': ( 'uint', 'both', ),
1096 'xmm10.lo.zx': ( 'uint', 'both', ),
1097 'xmm11.lo.zx': ( 'uint', 'both', ),
1098 'xmm12.lo.zx': ( 'uint', 'both', ),
1099 'xmm13.lo.zx': ( 'uint', 'both', ),
1100 'xmm14.lo.zx': ( 'uint', 'both', ),
1101 'xmm15.lo.zx': ( 'uint', 'both', ),
1102 'xmm0.dw0': ( 'uint', 'both', ),
1103 'xmm1.dw0': ( 'uint', 'both', ),
1104 'xmm2.dw0': ( 'uint', 'both', ),
1105 'xmm3.dw0': ( 'uint', 'both', ),
1106 'xmm4.dw0': ( 'uint', 'both', ),
1107 'xmm5.dw0': ( 'uint', 'both', ),
1108 'xmm6.dw0': ( 'uint', 'both', ),
1109 'xmm7.dw0': ( 'uint', 'both', ),
1110 'xmm8.dw0': ( 'uint', 'both', ),
1111 'xmm9.dw0': ( 'uint', 'both', ),
1112 'xmm10.dw0': ( 'uint', 'both', ),
1113 'xmm11.dw0': ( 'uint', 'both', ),
1114 'xmm12.dw0': ( 'uint', 'both', ),
1115 'xmm13.dw0': ( 'uint', 'both', ),
1116 'xmm14.dw0': ( 'uint', 'both', ),
1117 'xmm15_dw0': ( 'uint', 'both', ),
1118 # AVX registers.
1119 'ymm0': ( 'uint', 'both', ),
1120 'ymm1': ( 'uint', 'both', ),
1121 'ymm2': ( 'uint', 'both', ),
1122 'ymm3': ( 'uint', 'both', ),
1123 'ymm4': ( 'uint', 'both', ),
1124 'ymm5': ( 'uint', 'both', ),
1125 'ymm6': ( 'uint', 'both', ),
1126 'ymm7': ( 'uint', 'both', ),
1127 'ymm8': ( 'uint', 'both', ),
1128 'ymm9': ( 'uint', 'both', ),
1129 'ymm10': ( 'uint', 'both', ),
1130 'ymm11': ( 'uint', 'both', ),
1131 'ymm12': ( 'uint', 'both', ),
1132 'ymm13': ( 'uint', 'both', ),
1133 'ymm14': ( 'uint', 'both', ),
1134 'ymm15': ( 'uint', 'both', ),
1135
1136 # Special ones.
1137 'value.xcpt': ( 'uint', 'output', ),
1138 };
1139
1140 def __init__(self, sField, sOp, sValue, sType):
1141 assert sField in self.kdFields;
1142 assert sOp in self.kasOperators;
1143 self.sField = sField;
1144 self.sOp = sOp;
1145 self.sValue = sValue;
1146 self.sType = sType;
1147 assert isinstance(sField, str);
1148 assert isinstance(sOp, str);
1149 assert isinstance(sType, str);
1150 assert isinstance(sValue, str);
1151
1152
1153class TestSelector(object):
1154 """
1155 One selector for an instruction test.
1156 """
1157 ## Selector compare operators.
1158 kasCompareOps = [ '==', '!=' ];
1159 ## Selector variables and their valid values.
1160 kdVariables = {
1161 # Operand size.
1162 'size': {
1163 'o16': 'size_o16',
1164 'o32': 'size_o32',
1165 'o64': 'size_o64',
1166 },
1167 # VEX.L value.
1168 'vex.l': {
1169 '0': 'vexl_0',
1170 '1': 'vexl_1',
1171 },
1172 # Execution ring.
1173 'ring': {
1174 '0': 'ring_0',
1175 '1': 'ring_1',
1176 '2': 'ring_2',
1177 '3': 'ring_3',
1178 '0..2': 'ring_0_thru_2',
1179 '1..3': 'ring_1_thru_3',
1180 },
1181 # Basic code mode.
1182 'codebits': {
1183 '64': 'code_64bit',
1184 '32': 'code_32bit',
1185 '16': 'code_16bit',
1186 },
1187 # cpu modes.
1188 'mode': {
1189 'real': 'mode_real',
1190 'prot': 'mode_prot',
1191 'long': 'mode_long',
1192 'v86': 'mode_v86',
1193 'smm': 'mode_smm',
1194 'vmx': 'mode_vmx',
1195 'svm': 'mode_svm',
1196 },
1197 # paging on/off
1198 'paging': {
1199 'on': 'paging_on',
1200 'off': 'paging_off',
1201 },
1202 # CPU vendor
1203 'vendor': {
1204 'amd': 'vendor_amd',
1205 'intel': 'vendor_intel',
1206 'via': 'vendor_via',
1207 },
1208 };
1209 ## Selector shorthand predicates.
1210 ## These translates into variable expressions.
1211 kdPredicates = {
1212 'o16': 'size==o16',
1213 'o32': 'size==o32',
1214 'o64': 'size==o64',
1215 'ring0': 'ring==0',
1216 '!ring0': 'ring==1..3',
1217 'ring1': 'ring==1',
1218 'ring2': 'ring==2',
1219 'ring3': 'ring==3',
1220 'user': 'ring==3',
1221 'supervisor': 'ring==0..2',
1222 '16-bit': 'codebits==16',
1223 '32-bit': 'codebits==32',
1224 '64-bit': 'codebits==64',
1225 'real': 'mode==real',
1226 'prot': 'mode==prot',
1227 'long': 'mode==long',
1228 'v86': 'mode==v86',
1229 'smm': 'mode==smm',
1230 'vmx': 'mode==vmx',
1231 'svm': 'mode==svm',
1232 'paging': 'paging==on',
1233 '!paging': 'paging==off',
1234 'amd': 'vendor==amd',
1235 '!amd': 'vendor!=amd',
1236 'intel': 'vendor==intel',
1237 '!intel': 'vendor!=intel',
1238 'via': 'vendor==via',
1239 '!via': 'vendor!=via',
1240 };
1241
1242 def __init__(self, sVariable, sOp, sValue):
1243 assert sVariable in self.kdVariables;
1244 assert sOp in self.kasCompareOps;
1245 assert sValue in self.kdVariables[sVariable];
1246 self.sVariable = sVariable;
1247 self.sOp = sOp;
1248 self.sValue = sValue;
1249
1250
1251class InstructionTest(object):
1252 """
1253 Instruction test.
1254 """
1255
1256 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1257 self.oInstr = oInstr; # type: InstructionTest
1258 self.aoInputs = []; # type: list(TestInOut)
1259 self.aoOutputs = []; # type: list(TestInOut)
1260 self.aoSelectors = []; # type: list(TestSelector)
1261
1262 def toString(self, fRepr = False):
1263 """
1264 Converts it to string representation.
1265 """
1266 asWords = [];
1267 if self.aoSelectors:
1268 for oSelector in self.aoSelectors:
1269 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1270 asWords.append('/');
1271
1272 for oModifier in self.aoInputs:
1273 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1274
1275 asWords.append('->');
1276
1277 for oModifier in self.aoOutputs:
1278 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1279
1280 if fRepr:
1281 return '<' + ' '.join(asWords) + '>';
1282 return ' '.join(asWords);
1283
1284 def __str__(self):
1285 """ Provide string represenation. """
1286 return self.toString(False);
1287
1288 def __repr__(self):
1289 """ Provide unambigious string representation. """
1290 return self.toString(True);
1291
1292class Operand(object):
1293 """
1294 Instruction operand.
1295 """
1296
1297 def __init__(self, sWhere, sType):
1298 assert sWhere in g_kdOpLocations, sWhere;
1299 assert sType in g_kdOpTypes, sType;
1300 self.sWhere = sWhere; ##< g_kdOpLocations
1301 self.sType = sType; ##< g_kdOpTypes
1302
1303 def usesModRM(self):
1304 """ Returns True if using some form of ModR/M encoding. """
1305 return self.sType[0] in ['E', 'G', 'M'];
1306
1307
1308
1309class Instruction(object): # pylint: disable=too-many-instance-attributes
1310 """
1311 Instruction.
1312 """
1313
1314 def __init__(self, sSrcFile, iLine):
1315 ## @name Core attributes.
1316 ## @{
1317 self.sMnemonic = None;
1318 self.sBrief = None;
1319 self.asDescSections = []; # type: list(str)
1320 self.aoMaps = []; # type: list(InstructionMap)
1321 self.aoOperands = []; # type: list(Operand)
1322 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1323 self.sOpcode = None; # type: str
1324 self.sSubOpcode = None; # type: str
1325 self.sEncoding = None;
1326 self.asFlTest = None;
1327 self.asFlModify = None;
1328 self.asFlUndefined = None;
1329 self.asFlSet = None;
1330 self.asFlClear = None;
1331 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1332 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1333 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1334 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1335 self.aoTests = []; # type: list(InstructionTest)
1336 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1337 self.oCpuExpr = None; ##< Some CPU restriction expression...
1338 self.sGroup = None;
1339 self.fUnused = False; ##< Unused instruction.
1340 self.fInvalid = False; ##< Invalid instruction (like UD2).
1341 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1342 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1343 ## @}
1344
1345 ## @name Implementation attributes.
1346 ## @{
1347 self.sStats = None;
1348 self.sFunction = None;
1349 self.fStub = False;
1350 self.fUdStub = False;
1351 ## @}
1352
1353 ## @name Decoding info
1354 ## @{
1355 self.sSrcFile = sSrcFile;
1356 self.iLineCreated = iLine;
1357 self.iLineCompleted = None;
1358 self.cOpTags = 0;
1359 self.iLineFnIemOpMacro = -1;
1360 self.iLineMnemonicMacro = -1;
1361 ## @}
1362
1363 ## @name Intermediate input fields.
1364 ## @{
1365 self.sRawDisOpNo = None;
1366 self.asRawDisParams = [];
1367 self.sRawIemOpFlags = None;
1368 self.sRawOldOpcodes = None;
1369 self.asCopyTests = [];
1370 ## @}
1371
1372 def toString(self, fRepr = False):
1373 """ Turn object into a string. """
1374 aasFields = [];
1375
1376 aasFields.append(['opcode', self.sOpcode]);
1377 aasFields.append(['mnemonic', self.sMnemonic]);
1378 for iOperand, oOperand in enumerate(self.aoOperands):
1379 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1380 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1381 aasFields.append(['encoding', self.sEncoding]);
1382 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1383 aasFields.append(['disenum', self.sDisEnum]);
1384 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1385 aasFields.append(['group', self.sGroup]);
1386 if self.fUnused: aasFields.append(['unused', 'True']);
1387 if self.fInvalid: aasFields.append(['invalid', 'True']);
1388 aasFields.append(['invlstyle', self.sInvalidStyle]);
1389 aasFields.append(['fltest', self.asFlTest]);
1390 aasFields.append(['flmodify', self.asFlModify]);
1391 aasFields.append(['flundef', self.asFlUndefined]);
1392 aasFields.append(['flset', self.asFlSet]);
1393 aasFields.append(['flclear', self.asFlClear]);
1394 aasFields.append(['mincpu', self.sMinCpu]);
1395 aasFields.append(['stats', self.sStats]);
1396 aasFields.append(['sFunction', self.sFunction]);
1397 if self.fStub: aasFields.append(['fStub', 'True']);
1398 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1399 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1400 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1401 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1402
1403 sRet = '<' if fRepr else '';
1404 for sField, sValue in aasFields:
1405 if sValue != None:
1406 if len(sRet) > 1:
1407 sRet += '; ';
1408 sRet += '%s=%s' % (sField, sValue,);
1409 if fRepr:
1410 sRet += '>';
1411
1412 return sRet;
1413
1414 def __str__(self):
1415 """ Provide string represenation. """
1416 return self.toString(False);
1417
1418 def __repr__(self):
1419 """ Provide unambigious string representation. """
1420 return self.toString(True);
1421
1422 def getOpcodeByte(self):
1423 """
1424 Decodes sOpcode into a byte range integer value.
1425 Raises exception if sOpcode is None or invalid.
1426 """
1427 if self.sOpcode is None:
1428 raise Exception('No opcode byte for %s!' % (self,));
1429 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1430
1431 # Full hex byte form.
1432 if sOpcode[:2] == '0x':
1433 return int(sOpcode, 16);
1434
1435 # The /r form:
1436 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1437 return int(sOpcode[-1:]) << 3;
1438
1439 # The 11/r form:
1440 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1441 return (int(sOpcode[-1:]) << 3) | 0xc0;
1442
1443 # The !11/r form (returns mod=1):
1444 ## @todo this doesn't really work...
1445 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1446 return (int(sOpcode[-1:]) << 3) | 0x80;
1447
1448 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1449
1450 @staticmethod
1451 def _flagsToIntegerMask(asFlags):
1452 """
1453 Returns the integer mask value for asFlags.
1454 """
1455 uRet = 0;
1456 if asFlags:
1457 for sFlag in asFlags:
1458 sConstant = g_kdEFlagsMnemonics[sFlag];
1459 assert sConstant[0] != '!', sConstant
1460 uRet |= g_kdX86EFlagsConstants[sConstant];
1461 return uRet;
1462
1463 def getTestedFlagsMask(self):
1464 """ Returns asFlTest into a integer mask value """
1465 return self._flagsToIntegerMask(self.asFlTest);
1466
1467 def getModifiedFlagsMask(self):
1468 """ Returns asFlModify into a integer mask value """
1469 return self._flagsToIntegerMask(self.asFlModify);
1470
1471 def getUndefinedFlagsMask(self):
1472 """ Returns asFlUndefined into a integer mask value """
1473 return self._flagsToIntegerMask(self.asFlUndefined);
1474
1475 def getSetFlagsMask(self):
1476 """ Returns asFlSet into a integer mask value """
1477 return self._flagsToIntegerMask(self.asFlSet);
1478
1479 def getClearedFlagsMask(self):
1480 """ Returns asFlClear into a integer mask value """
1481 return self._flagsToIntegerMask(self.asFlClear);
1482
1483 def onlyInVexMaps(self):
1484 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1485 if not self.aoMaps:
1486 return False;
1487 for oMap in self.aoMaps:
1488 if not oMap.isVexMap():
1489 return False;
1490 return True;
1491
1492
1493
1494## All the instructions.
1495g_aoAllInstructions = []; # type: list(Instruction)
1496
1497## All the instructions indexed by statistics name (opstat).
1498g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1499
1500## All the instructions indexed by function name (opfunction).
1501g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1502
1503## Instructions tagged by oponlytest
1504g_aoOnlyTestInstructions = []; # type: list(Instruction)
1505
1506## Instruction maps.
1507g_dInstructionMaps = {
1508 'one': InstructionMap('one'),
1509 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1510 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1511 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1512 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1513 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1514 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1515 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1516 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1517 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1518 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1519 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1520 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1521 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1522 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1523 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1524 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1525 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1526 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1527 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1528
1529 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1530 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1531 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1532 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1533 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1534 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1535 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1536 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1537 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1538 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1539 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1540 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1541 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1542 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1543
1544 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1545 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1546
1547 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1548 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1549 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1550 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1551 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1552 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1553
1554 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1555 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1556
1557 '3dnow': InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1558 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1559 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1560 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1561 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1562 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1563 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1564 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1565};
1566
1567
1568
1569class ParserException(Exception):
1570 """ Parser exception """
1571 def __init__(self, sMessage):
1572 Exception.__init__(self, sMessage);
1573
1574
1575class SimpleParser(object):
1576 """
1577 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1578 """
1579
1580 ## @name Parser state.
1581 ## @{
1582 kiCode = 0;
1583 kiCommentMulti = 1;
1584 ## @}
1585
1586 def __init__(self, sSrcFile, asLines, sDefaultMap):
1587 self.sSrcFile = sSrcFile;
1588 self.asLines = asLines;
1589 self.iLine = 0;
1590 self.iState = self.kiCode;
1591 self.sComment = '';
1592 self.iCommentLine = 0;
1593 self.aoCurInstrs = [];
1594
1595 assert sDefaultMap in g_dInstructionMaps;
1596 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1597
1598 self.cTotalInstr = 0;
1599 self.cTotalStubs = 0;
1600 self.cTotalTagged = 0;
1601
1602 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1603 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1604 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1605 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1606 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1607 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1608 self.fDebug = True;
1609
1610 self.dTagHandlers = {
1611 '@opbrief': self.parseTagOpBrief,
1612 '@opdesc': self.parseTagOpDesc,
1613 '@opmnemonic': self.parseTagOpMnemonic,
1614 '@op1': self.parseTagOpOperandN,
1615 '@op2': self.parseTagOpOperandN,
1616 '@op3': self.parseTagOpOperandN,
1617 '@op4': self.parseTagOpOperandN,
1618 '@oppfx': self.parseTagOpPfx,
1619 '@opmaps': self.parseTagOpMaps,
1620 '@opcode': self.parseTagOpcode,
1621 '@opcodesub': self.parseTagOpcodeSub,
1622 '@openc': self.parseTagOpEnc,
1623 '@opfltest': self.parseTagOpEFlags,
1624 '@opflmodify': self.parseTagOpEFlags,
1625 '@opflundef': self.parseTagOpEFlags,
1626 '@opflset': self.parseTagOpEFlags,
1627 '@opflclear': self.parseTagOpEFlags,
1628 '@ophints': self.parseTagOpHints,
1629 '@opdisenum': self.parseTagOpDisEnum,
1630 '@opmincpu': self.parseTagOpMinCpu,
1631 '@opcpuid': self.parseTagOpCpuId,
1632 '@opgroup': self.parseTagOpGroup,
1633 '@opunused': self.parseTagOpUnusedInvalid,
1634 '@opinvalid': self.parseTagOpUnusedInvalid,
1635 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1636 '@optest': self.parseTagOpTest,
1637 '@optestign': self.parseTagOpTestIgnore,
1638 '@optestignore': self.parseTagOpTestIgnore,
1639 '@opcopytests': self.parseTagOpCopyTests,
1640 '@oponly': self.parseTagOpOnlyTest,
1641 '@oponlytest': self.parseTagOpOnlyTest,
1642 '@opxcpttype': self.parseTagOpXcptType,
1643 '@opstats': self.parseTagOpStats,
1644 '@opfunction': self.parseTagOpFunction,
1645 '@opdone': self.parseTagOpDone,
1646 };
1647 for i in range(48):
1648 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1649 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1650
1651 self.asErrors = [];
1652
1653 def raiseError(self, sMessage):
1654 """
1655 Raise error prefixed with the source and line number.
1656 """
1657 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1658
1659 def raiseCommentError(self, iLineInComment, sMessage):
1660 """
1661 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1662 """
1663 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1664
1665 def error(self, sMessage):
1666 """
1667 Adds an error.
1668 returns False;
1669 """
1670 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1671 return False;
1672
1673 def errorComment(self, iLineInComment, sMessage):
1674 """
1675 Adds a comment error.
1676 returns False;
1677 """
1678 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1679 return False;
1680
1681 def printErrors(self):
1682 """
1683 Print the errors to stderr.
1684 Returns number of errors.
1685 """
1686 if self.asErrors:
1687 sys.stderr.write(u''.join(self.asErrors));
1688 return len(self.asErrors);
1689
1690 def debug(self, sMessage):
1691 """
1692 For debugging.
1693 """
1694 if self.fDebug:
1695 print('debug: %s' % (sMessage,));
1696
1697
1698 def addInstruction(self, iLine = None):
1699 """
1700 Adds an instruction.
1701 """
1702 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1703 g_aoAllInstructions.append(oInstr);
1704 self.aoCurInstrs.append(oInstr);
1705 return oInstr;
1706
1707 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1708 """
1709 Derives the mnemonic and operands from a IEM stats base name like string.
1710 """
1711 if oInstr.sMnemonic is None:
1712 asWords = sStats.split('_');
1713 oInstr.sMnemonic = asWords[0].lower();
1714 if len(asWords) > 1 and not oInstr.aoOperands:
1715 for sType in asWords[1:]:
1716 if sType in g_kdOpTypes:
1717 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1718 else:
1719 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1720 return False;
1721 return True;
1722
1723 def doneInstructionOne(self, oInstr, iLine):
1724 """
1725 Complete the parsing by processing, validating and expanding raw inputs.
1726 """
1727 assert oInstr.iLineCompleted is None;
1728 oInstr.iLineCompleted = iLine;
1729
1730 #
1731 # Specified instructions.
1732 #
1733 if oInstr.cOpTags > 0:
1734 if oInstr.sStats is None:
1735 pass;
1736
1737 #
1738 # Unspecified legacy stuff. We generally only got a few things to go on here.
1739 # /** Opcode 0x0f 0x00 /0. */
1740 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1741 #
1742 else:
1743 #if oInstr.sRawOldOpcodes:
1744 #
1745 #if oInstr.sMnemonic:
1746 pass;
1747
1748 #
1749 # Common defaults.
1750 #
1751
1752 # Guess mnemonic and operands from stats if the former is missing.
1753 if oInstr.sMnemonic is None:
1754 if oInstr.sStats is not None:
1755 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1756 elif oInstr.sFunction is not None:
1757 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1758
1759 # Derive the disassembler op enum constant from the mnemonic.
1760 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1761 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1762
1763 # Derive the IEM statistics base name from mnemonic and operand types.
1764 if oInstr.sStats is None:
1765 if oInstr.sFunction is not None:
1766 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1767 elif oInstr.sMnemonic is not None:
1768 oInstr.sStats = oInstr.sMnemonic;
1769 for oOperand in oInstr.aoOperands:
1770 if oOperand.sType:
1771 oInstr.sStats += '_' + oOperand.sType;
1772
1773 # Derive the IEM function name from mnemonic and operand types.
1774 if oInstr.sFunction is None:
1775 if oInstr.sMnemonic is not None:
1776 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1777 for oOperand in oInstr.aoOperands:
1778 if oOperand.sType:
1779 oInstr.sFunction += '_' + oOperand.sType;
1780 elif oInstr.sStats:
1781 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1782
1783 #
1784 # Apply default map and then add the instruction to all it's groups.
1785 #
1786 if not oInstr.aoMaps:
1787 oInstr.aoMaps = [ self.oDefaultMap, ];
1788 for oMap in oInstr.aoMaps:
1789 oMap.aoInstructions.append(oInstr);
1790
1791 #
1792 # Derive encoding from operands and maps.
1793 #
1794 if oInstr.sEncoding is None:
1795 if not oInstr.aoOperands:
1796 if oInstr.fUnused and oInstr.sSubOpcode:
1797 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
1798 else:
1799 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
1800 elif oInstr.aoOperands[0].usesModRM():
1801 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
1802 or oInstr.onlyInVexMaps():
1803 oInstr.sEncoding = 'VEX.ModR/M';
1804 else:
1805 oInstr.sEncoding = 'ModR/M';
1806
1807 #
1808 # Check the opstat value and add it to the opstat indexed dictionary.
1809 #
1810 if oInstr.sStats:
1811 if oInstr.sStats not in g_dAllInstructionsByStat:
1812 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1813 else:
1814 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1815 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1816
1817 #
1818 # Add to function indexed dictionary. We allow multiple instructions per function.
1819 #
1820 if oInstr.sFunction:
1821 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1822 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1823 else:
1824 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1825
1826 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1827 return True;
1828
1829 def doneInstructions(self, iLineInComment = None):
1830 """
1831 Done with current instruction.
1832 """
1833 for oInstr in self.aoCurInstrs:
1834 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1835 if oInstr.fStub:
1836 self.cTotalStubs += 1;
1837
1838 self.cTotalInstr += len(self.aoCurInstrs);
1839
1840 self.sComment = '';
1841 self.aoCurInstrs = [];
1842 return True;
1843
1844 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1845 """
1846 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1847 is False, only None values and empty strings are replaced.
1848 """
1849 for oInstr in self.aoCurInstrs:
1850 if fOverwrite is not True:
1851 oOldValue = getattr(oInstr, sAttrib);
1852 if oOldValue is not None:
1853 continue;
1854 setattr(oInstr, sAttrib, oValue);
1855
1856 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1857 """
1858 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1859 If fOverwrite is False, only None values and empty strings are replaced.
1860 """
1861 for oInstr in self.aoCurInstrs:
1862 aoArray = getattr(oInstr, sAttrib);
1863 while len(aoArray) <= iEntry:
1864 aoArray.append(None);
1865 if fOverwrite is True or aoArray[iEntry] is None:
1866 aoArray[iEntry] = oValue;
1867
1868 def parseCommentOldOpcode(self, asLines):
1869 """ Deals with 'Opcode 0xff /4' like comments """
1870 asWords = asLines[0].split();
1871 if len(asWords) >= 2 \
1872 and asWords[0] == 'Opcode' \
1873 and ( asWords[1].startswith('0x')
1874 or asWords[1].startswith('0X')):
1875 asWords = asWords[:1];
1876 for iWord, sWord in enumerate(asWords):
1877 if sWord.startswith('0X'):
1878 sWord = '0x' + sWord[:2];
1879 asWords[iWord] = asWords;
1880 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1881
1882 return False;
1883
1884 def ensureInstructionForOpTag(self, iTagLine):
1885 """ Ensure there is an instruction for the op-tag being parsed. """
1886 if not self.aoCurInstrs:
1887 self.addInstruction(self.iCommentLine + iTagLine);
1888 for oInstr in self.aoCurInstrs:
1889 oInstr.cOpTags += 1;
1890 if oInstr.cOpTags == 1:
1891 self.cTotalTagged += 1;
1892 return self.aoCurInstrs[-1];
1893
1894 @staticmethod
1895 def flattenSections(aasSections):
1896 """
1897 Flattens multiline sections into stripped single strings.
1898 Returns list of strings, on section per string.
1899 """
1900 asRet = [];
1901 for asLines in aasSections:
1902 if asLines:
1903 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1904 return asRet;
1905
1906 @staticmethod
1907 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1908 """
1909 Flattens sections into a simple stripped string with newlines as
1910 section breaks. The final section does not sport a trailing newline.
1911 """
1912 # Typical: One section with a single line.
1913 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1914 return aasSections[0][0].strip();
1915
1916 sRet = '';
1917 for iSection, asLines in enumerate(aasSections):
1918 if asLines:
1919 if iSection > 0:
1920 sRet += sSectionSep;
1921 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1922 return sRet;
1923
1924
1925
1926 ## @name Tag parsers
1927 ## @{
1928
1929 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1930 """
1931 Tag: \@opbrief
1932 Value: Text description, multiple sections, appended.
1933
1934 Brief description. If not given, it's the first sentence from @opdesc.
1935 """
1936 oInstr = self.ensureInstructionForOpTag(iTagLine);
1937
1938 # Flatten and validate the value.
1939 sBrief = self.flattenAllSections(aasSections);
1940 if not sBrief:
1941 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1942 if sBrief[-1] != '.':
1943 sBrief = sBrief + '.';
1944 if len(sBrief) > 180:
1945 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1946 offDot = sBrief.find('.');
1947 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1948 offDot = sBrief.find('.', offDot + 1);
1949 if offDot >= 0 and offDot != len(sBrief) - 1:
1950 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1951
1952 # Update the instruction.
1953 if oInstr.sBrief is not None:
1954 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1955 % (sTag, oInstr.sBrief, sBrief,));
1956 _ = iEndLine;
1957 return True;
1958
1959 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1960 """
1961 Tag: \@opdesc
1962 Value: Text description, multiple sections, appended.
1963
1964 It is used to describe instructions.
1965 """
1966 oInstr = self.ensureInstructionForOpTag(iTagLine);
1967 if aasSections:
1968 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1969 return True;
1970
1971 _ = sTag; _ = iEndLine;
1972 return True;
1973
1974 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1975 """
1976 Tag: @opmenmonic
1977 Value: mnemonic
1978
1979 The 'mnemonic' value must be a valid C identifier string. Because of
1980 prefixes, groups and whatnot, there times when the mnemonic isn't that
1981 of an actual assembler mnemonic.
1982 """
1983 oInstr = self.ensureInstructionForOpTag(iTagLine);
1984
1985 # Flatten and validate the value.
1986 sMnemonic = self.flattenAllSections(aasSections);
1987 if not self.oReMnemonic.match(sMnemonic):
1988 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1989 if oInstr.sMnemonic is not None:
1990 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1991 % (sTag, oInstr.sMnemonic, sMnemonic,));
1992 oInstr.sMnemonic = sMnemonic
1993
1994 _ = iEndLine;
1995 return True;
1996
1997 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1998 """
1999 Tags: \@op1, \@op2, \@op3, \@op4
2000 Value: [where:]type
2001
2002 The 'where' value indicates where the operand is found, like the 'reg'
2003 part of the ModR/M encoding. See Instruction.kdOperandLocations for
2004 a list.
2005
2006 The 'type' value indicates the operand type. These follow the types
2007 given in the opcode tables in the CPU reference manuals.
2008 See Instruction.kdOperandTypes for a list.
2009
2010 """
2011 oInstr = self.ensureInstructionForOpTag(iTagLine);
2012 idxOp = int(sTag[-1]) - 1;
2013 assert idxOp >= 0 and idxOp < 4;
2014
2015 # flatten, split up, and validate the "where:type" value.
2016 sFlattened = self.flattenAllSections(aasSections);
2017 asSplit = sFlattened.split(':');
2018 if len(asSplit) == 1:
2019 sType = asSplit[0];
2020 sWhere = None;
2021 elif len(asSplit) == 2:
2022 (sWhere, sType) = asSplit;
2023 else:
2024 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
2025
2026 if sType not in g_kdOpTypes:
2027 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2028 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
2029 if sWhere is None:
2030 sWhere = g_kdOpTypes[sType][1];
2031 elif sWhere not in g_kdOpLocations:
2032 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2033 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
2034
2035 # Insert the operand, refusing to overwrite an existing one.
2036 while idxOp >= len(oInstr.aoOperands):
2037 oInstr.aoOperands.append(None);
2038 if oInstr.aoOperands[idxOp] is not None:
2039 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
2040 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
2041 sWhere, sType,));
2042 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
2043
2044 _ = iEndLine;
2045 return True;
2046
2047 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
2048 """
2049 Tag: \@opmaps
2050 Value: map[,map2]
2051
2052 Indicates which maps the instruction is in. There is a default map
2053 associated with each input file.
2054 """
2055 oInstr = self.ensureInstructionForOpTag(iTagLine);
2056
2057 # Flatten, split up and validate the value.
2058 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
2059 asMaps = sFlattened.split(',');
2060 if not asMaps:
2061 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2062 for sMap in asMaps:
2063 if sMap not in g_dInstructionMaps:
2064 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
2065 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2066
2067 # Add the maps to the current list. Throw errors on duplicates.
2068 for oMap in oInstr.aoMaps:
2069 if oMap.sName in asMaps:
2070 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2071
2072 for sMap in asMaps:
2073 oMap = g_dInstructionMaps[sMap];
2074 if oMap not in oInstr.aoMaps:
2075 oInstr.aoMaps.append(oMap);
2076 else:
2077 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2078
2079 _ = iEndLine;
2080 return True;
2081
2082 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2083 """
2084 Tag: \@oppfx
2085 Value: n/a|none|0x66|0xf3|0xf2
2086
2087 Required prefix for the instruction. (In a (E)VEX context this is the
2088 value of the 'pp' field rather than an actual prefix.)
2089 """
2090 oInstr = self.ensureInstructionForOpTag(iTagLine);
2091
2092 # Flatten and validate the value.
2093 sFlattened = self.flattenAllSections(aasSections);
2094 asPrefixes = sFlattened.split();
2095 if len(asPrefixes) > 1:
2096 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2097
2098 sPrefix = asPrefixes[0].lower();
2099 if sPrefix == 'none':
2100 sPrefix = 'none';
2101 elif sPrefix == 'n/a':
2102 sPrefix = None;
2103 else:
2104 if len(sPrefix) == 2:
2105 sPrefix = '0x' + sPrefix;
2106 if not _isValidOpcodeByte(sPrefix):
2107 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2108
2109 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2110 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2111
2112 # Set it.
2113 if oInstr.sPrefix is not None:
2114 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2115 oInstr.sPrefix = sPrefix;
2116
2117 _ = iEndLine;
2118 return True;
2119
2120 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2121 """
2122 Tag: \@opcode
2123 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2124
2125 The opcode byte or sub-byte for the instruction in the context of a map.
2126 """
2127 oInstr = self.ensureInstructionForOpTag(iTagLine);
2128
2129 # Flatten and validate the value.
2130 sOpcode = self.flattenAllSections(aasSections);
2131 if _isValidOpcodeByte(sOpcode):
2132 pass;
2133 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2134 pass;
2135 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2136 pass;
2137 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2138 pass;
2139 else:
2140 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2141
2142 # Set it.
2143 if oInstr.sOpcode is not None:
2144 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2145 oInstr.sOpcode = sOpcode;
2146
2147 _ = iEndLine;
2148 return True;
2149
2150 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2151 """
2152 Tag: \@opcodesub
2153 Value: none | 11 mr/reg | !11 mr/reg
2154
2155 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2156 represents exactly two different instructions. The more proper way would
2157 be to go via maps with two members, but this is faster.
2158 """
2159 oInstr = self.ensureInstructionForOpTag(iTagLine);
2160
2161 # Flatten and validate the value.
2162 sSubOpcode = self.flattenAllSections(aasSections);
2163 if sSubOpcode not in g_kdSubOpcodes:
2164 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2165 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2166
2167 # Set it.
2168 if oInstr.sSubOpcode is not None:
2169 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2170 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2171 oInstr.sSubOpcode = sSubOpcode;
2172
2173 _ = iEndLine;
2174 return True;
2175
2176 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2177 """
2178 Tag: \@openc
2179 Value: ModR/M|fixed|prefix|<map name>
2180
2181 The instruction operand encoding style.
2182 """
2183 oInstr = self.ensureInstructionForOpTag(iTagLine);
2184
2185 # Flatten and validate the value.
2186 sEncoding = self.flattenAllSections(aasSections);
2187 if sEncoding in g_kdEncodings:
2188 pass;
2189 elif sEncoding in g_dInstructionMaps:
2190 pass;
2191 elif not _isValidOpcodeByte(sEncoding):
2192 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2193
2194 # Set it.
2195 if oInstr.sEncoding is not None:
2196 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2197 % ( sTag, oInstr.sEncoding, sEncoding,));
2198 oInstr.sEncoding = sEncoding;
2199
2200 _ = iEndLine;
2201 return True;
2202
2203 ## EFlags tag to Instruction attribute name.
2204 kdOpFlagToAttr = {
2205 '@opfltest': 'asFlTest',
2206 '@opflmodify': 'asFlModify',
2207 '@opflundef': 'asFlUndefined',
2208 '@opflset': 'asFlSet',
2209 '@opflclear': 'asFlClear',
2210 };
2211
2212 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2213 """
2214 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2215 Value: <eflags specifier>
2216
2217 """
2218 oInstr = self.ensureInstructionForOpTag(iTagLine);
2219
2220 # Flatten, split up and validate the values.
2221 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2222 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2223 asFlags = [];
2224 else:
2225 fRc = True;
2226 for iFlag, sFlag in enumerate(asFlags):
2227 if sFlag not in g_kdEFlagsMnemonics:
2228 if sFlag.strip() in g_kdEFlagsMnemonics:
2229 asFlags[iFlag] = sFlag.strip();
2230 else:
2231 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2232 if not fRc:
2233 return False;
2234
2235 # Set them.
2236 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2237 if asOld is not None:
2238 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2239 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2240
2241 _ = iEndLine;
2242 return True;
2243
2244 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2245 """
2246 Tag: \@ophints
2247 Value: Comma or space separated list of flags and hints.
2248
2249 This covers the disassembler flags table and more.
2250 """
2251 oInstr = self.ensureInstructionForOpTag(iTagLine);
2252
2253 # Flatten as a space separated list, split it up and validate the values.
2254 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2255 if len(asHints) == 1 and asHints[0].lower() == 'none':
2256 asHints = [];
2257 else:
2258 fRc = True;
2259 for iHint, sHint in enumerate(asHints):
2260 if sHint not in g_kdHints:
2261 if sHint.strip() in g_kdHints:
2262 sHint[iHint] = sHint.strip();
2263 else:
2264 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2265 if not fRc:
2266 return False;
2267
2268 # Append them.
2269 for sHint in asHints:
2270 if sHint not in oInstr.dHints:
2271 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2272 else:
2273 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2274
2275 _ = iEndLine;
2276 return True;
2277
2278 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2279 """
2280 Tag: \@opdisenum
2281 Value: OP_XXXX
2282
2283 This is for select a specific (legacy) disassembler enum value for the
2284 instruction.
2285 """
2286 oInstr = self.ensureInstructionForOpTag(iTagLine);
2287
2288 # Flatten and split.
2289 asWords = self.flattenAllSections(aasSections).split();
2290 if len(asWords) != 1:
2291 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2292 if not asWords:
2293 return False;
2294 sDisEnum = asWords[0];
2295 if not self.oReDisEnum.match(sDisEnum):
2296 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2297 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2298
2299 # Set it.
2300 if oInstr.sDisEnum is not None:
2301 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2302 oInstr.sDisEnum = sDisEnum;
2303
2304 _ = iEndLine;
2305 return True;
2306
2307 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2308 """
2309 Tag: \@opmincpu
2310 Value: <simple CPU name>
2311
2312 Indicates when this instruction was introduced.
2313 """
2314 oInstr = self.ensureInstructionForOpTag(iTagLine);
2315
2316 # Flatten the value, split into words, make sure there's just one, valid it.
2317 asCpus = self.flattenAllSections(aasSections).split();
2318 if len(asCpus) > 1:
2319 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2320
2321 sMinCpu = asCpus[0];
2322 if sMinCpu in g_kdCpuNames:
2323 oInstr.sMinCpu = sMinCpu;
2324 else:
2325 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2326 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2327
2328 # Set it.
2329 if oInstr.sMinCpu is None:
2330 oInstr.sMinCpu = sMinCpu;
2331 elif oInstr.sMinCpu != sMinCpu:
2332 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2333
2334 _ = iEndLine;
2335 return True;
2336
2337 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2338 """
2339 Tag: \@opcpuid
2340 Value: none | <CPUID flag specifier>
2341
2342 CPUID feature bit which is required for the instruction to be present.
2343 """
2344 oInstr = self.ensureInstructionForOpTag(iTagLine);
2345
2346 # Flatten as a space separated list, split it up and validate the values.
2347 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2348 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2349 asCpuIds = [];
2350 else:
2351 fRc = True;
2352 for iCpuId, sCpuId in enumerate(asCpuIds):
2353 if sCpuId not in g_kdCpuIdFlags:
2354 if sCpuId.strip() in g_kdCpuIdFlags:
2355 sCpuId[iCpuId] = sCpuId.strip();
2356 else:
2357 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2358 if not fRc:
2359 return False;
2360
2361 # Append them.
2362 for sCpuId in asCpuIds:
2363 if sCpuId not in oInstr.asCpuIds:
2364 oInstr.asCpuIds.append(sCpuId);
2365 else:
2366 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2367
2368 _ = iEndLine;
2369 return True;
2370
2371 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2372 """
2373 Tag: \@opgroup
2374 Value: op_grp1[_subgrp2[_subsubgrp3]]
2375
2376 Instruction grouping.
2377 """
2378 oInstr = self.ensureInstructionForOpTag(iTagLine);
2379
2380 # Flatten as a space separated list, split it up and validate the values.
2381 asGroups = self.flattenAllSections(aasSections).split();
2382 if len(asGroups) != 1:
2383 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2384 sGroup = asGroups[0];
2385 if not self.oReGroupName.match(sGroup):
2386 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2387 % (sTag, sGroup, self.oReGroupName.pattern));
2388
2389 # Set it.
2390 if oInstr.sGroup is not None:
2391 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2392 oInstr.sGroup = sGroup;
2393
2394 _ = iEndLine;
2395 return True;
2396
2397 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2398 """
2399 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2400 Value: <invalid opcode behaviour style>
2401
2402 The \@opunused indicates the specification is for a currently unused
2403 instruction encoding.
2404
2405 The \@opinvalid indicates the specification is for an invalid currently
2406 instruction encoding (like UD2).
2407
2408 The \@opinvlstyle just indicates how CPUs decode the instruction when
2409 not supported (\@opcpuid, \@opmincpu) or disabled.
2410 """
2411 oInstr = self.ensureInstructionForOpTag(iTagLine);
2412
2413 # Flatten as a space separated list, split it up and validate the values.
2414 asStyles = self.flattenAllSections(aasSections).split();
2415 if len(asStyles) != 1:
2416 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2417 sStyle = asStyles[0];
2418 if sStyle not in g_kdInvalidStyles:
2419 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2420 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2421 # Set it.
2422 if oInstr.sInvalidStyle is not None:
2423 return self.errorComment(iTagLine,
2424 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2425 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2426 oInstr.sInvalidStyle = sStyle;
2427 if sTag == '@opunused':
2428 oInstr.fUnused = True;
2429 elif sTag == '@opinvalid':
2430 oInstr.fInvalid = True;
2431
2432 _ = iEndLine;
2433 return True;
2434
2435 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2436 """
2437 Tag: \@optest
2438 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2439 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2440
2441 The main idea here is to generate basic instruction tests.
2442
2443 The probably simplest way of handling the diverse input, would be to use
2444 it to produce size optimized byte code for a simple interpreter that
2445 modifies the register input and output states.
2446
2447 An alternative to the interpreter would be creating multiple tables,
2448 but that becomes rather complicated wrt what goes where and then to use
2449 them in an efficient manner.
2450 """
2451 oInstr = self.ensureInstructionForOpTag(iTagLine);
2452
2453 #
2454 # Do it section by section.
2455 #
2456 for asSectionLines in aasSections:
2457 #
2458 # Sort the input into outputs, inputs and selector conditions.
2459 #
2460 sFlatSection = self.flattenAllSections([asSectionLines,]);
2461 if not sFlatSection:
2462 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2463 continue;
2464 oTest = InstructionTest(oInstr);
2465
2466 asSelectors = [];
2467 asInputs = [];
2468 asOutputs = [];
2469 asCur = asOutputs;
2470 fRc = True;
2471 asWords = sFlatSection.split();
2472 for iWord in range(len(asWords) - 1, -1, -1):
2473 sWord = asWords[iWord];
2474 # Check for array switchers.
2475 if sWord == '->':
2476 if asCur != asOutputs:
2477 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2478 break;
2479 asCur = asInputs;
2480 elif sWord == '/':
2481 if asCur != asInputs:
2482 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2483 break;
2484 asCur = asSelectors;
2485 else:
2486 asCur.insert(0, sWord);
2487
2488 #
2489 # Validate and add selectors.
2490 #
2491 for sCond in asSelectors:
2492 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2493 oSelector = None;
2494 for sOp in TestSelector.kasCompareOps:
2495 off = sCondExp.find(sOp);
2496 if off >= 0:
2497 sVariable = sCondExp[:off];
2498 sValue = sCondExp[off + len(sOp):];
2499 if sVariable in TestSelector.kdVariables:
2500 if sValue in TestSelector.kdVariables[sVariable]:
2501 oSelector = TestSelector(sVariable, sOp, sValue);
2502 else:
2503 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2504 % ( sTag, sValue, sCond,
2505 TestSelector.kdVariables[sVariable].keys(),));
2506 else:
2507 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2508 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2509 break;
2510 if oSelector is not None:
2511 for oExisting in oTest.aoSelectors:
2512 if oExisting.sVariable == oSelector.sVariable:
2513 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2514 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2515 oTest.aoSelectors.append(oSelector);
2516 else:
2517 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2518
2519 #
2520 # Validate outputs and inputs, adding them to the test as we go along.
2521 #
2522 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2523 asValidFieldKinds = [ 'both', sDesc, ];
2524 for sItem in asItems:
2525 oItem = None;
2526 for sOp in TestInOut.kasOperators:
2527 off = sItem.find(sOp);
2528 if off < 0:
2529 continue;
2530 sField = sItem[:off];
2531 sValueType = sItem[off + len(sOp):];
2532 if sField in TestInOut.kdFields \
2533 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2534 asSplit = sValueType.split(':', 1);
2535 sValue = asSplit[0];
2536 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2537 if sType in TestInOut.kdTypes:
2538 oValid = TestInOut.kdTypes[sType].validate(sValue);
2539 if oValid is True:
2540 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2541 oItem = TestInOut(sField, sOp, sValue, sType);
2542 else:
2543 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2544 % ( sTag, sDesc, sItem, ));
2545 else:
2546 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2547 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2548 else:
2549 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2550 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2551 else:
2552 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2553 % ( sTag, sDesc, sField, sItem,
2554 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2555 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2556 break;
2557 if oItem is not None:
2558 for oExisting in aoDst:
2559 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2560 self.errorComment(iTagLine,
2561 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2562 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2563 aoDst.append(oItem);
2564 else:
2565 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2566
2567 #
2568 # .
2569 #
2570 if fRc:
2571 oInstr.aoTests.append(oTest);
2572 else:
2573 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2574 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2575 % (sTag, asSelectors, asInputs, asOutputs,));
2576
2577 _ = iEndLine;
2578 return True;
2579
2580 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2581 """
2582 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2583 """
2584 oInstr = self.ensureInstructionForOpTag(iTagLine);
2585
2586 iTest = 0;
2587 if sTag[-1] == ']':
2588 iTest = int(sTag[8:-1]);
2589 else:
2590 iTest = int(sTag[7:]);
2591
2592 if iTest != len(oInstr.aoTests):
2593 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2594 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2595
2596 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2597 """
2598 Tag: \@optestign | \@optestignore
2599 Value: <value is ignored>
2600
2601 This is a simple trick to ignore a test while debugging another.
2602
2603 See also \@oponlytest.
2604 """
2605 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2606 return True;
2607
2608 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2609 """
2610 Tag: \@opcopytests
2611 Value: <opstat | function> [..]
2612 Example: \@opcopytests add_Eb_Gb
2613
2614 Trick to avoid duplicating tests for different encodings of the same
2615 operation.
2616 """
2617 oInstr = self.ensureInstructionForOpTag(iTagLine);
2618
2619 # Flatten, validate and append the copy job to the instruction. We execute
2620 # them after parsing all the input so we can handle forward references.
2621 asToCopy = self.flattenAllSections(aasSections).split();
2622 if not asToCopy:
2623 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2624 for sToCopy in asToCopy:
2625 if sToCopy not in oInstr.asCopyTests:
2626 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2627 oInstr.asCopyTests.append(sToCopy);
2628 else:
2629 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2630 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2631 else:
2632 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2633
2634 _ = iEndLine;
2635 return True;
2636
2637 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2638 """
2639 Tag: \@oponlytest | \@oponly
2640 Value: none
2641
2642 Only test instructions with this tag. This is a trick that is handy
2643 for singling out one or two new instructions or tests.
2644
2645 See also \@optestignore.
2646 """
2647 oInstr = self.ensureInstructionForOpTag(iTagLine);
2648
2649 # Validate and add instruction to only test dictionary.
2650 sValue = self.flattenAllSections(aasSections).strip();
2651 if sValue:
2652 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2653
2654 if oInstr not in g_aoOnlyTestInstructions:
2655 g_aoOnlyTestInstructions.append(oInstr);
2656
2657 _ = iEndLine;
2658 return True;
2659
2660 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2661 """
2662 Tag: \@opxcpttype
2663 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2664
2665 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2666 """
2667 oInstr = self.ensureInstructionForOpTag(iTagLine);
2668
2669 # Flatten as a space separated list, split it up and validate the values.
2670 asTypes = self.flattenAllSections(aasSections).split();
2671 if len(asTypes) != 1:
2672 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2673 sType = asTypes[0];
2674 if sType not in g_kdXcptTypes:
2675 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2676 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2677 # Set it.
2678 if oInstr.sXcptType is not None:
2679 return self.errorComment(iTagLine,
2680 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2681 % ( sTag, oInstr.sXcptType, sType,));
2682 oInstr.sXcptType = sType;
2683
2684 _ = iEndLine;
2685 return True;
2686
2687 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2688 """
2689 Tag: \@opfunction
2690 Value: <VMM function name>
2691
2692 This is for explicitly setting the IEM function name. Normally we pick
2693 this up from the FNIEMOP_XXX macro invocation after the description, or
2694 generate it from the mnemonic and operands.
2695
2696 It it thought it maybe necessary to set it when specifying instructions
2697 which implementation isn't following immediately or aren't implemented yet.
2698 """
2699 oInstr = self.ensureInstructionForOpTag(iTagLine);
2700
2701 # Flatten and validate the value.
2702 sFunction = self.flattenAllSections(aasSections);
2703 if not self.oReFunctionName.match(sFunction):
2704 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2705 % (sTag, sFunction, self.oReFunctionName.pattern));
2706
2707 if oInstr.sFunction is not None:
2708 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2709 % (sTag, oInstr.sFunction, sFunction,));
2710 oInstr.sFunction = sFunction;
2711
2712 _ = iEndLine;
2713 return True;
2714
2715 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2716 """
2717 Tag: \@opstats
2718 Value: <VMM statistics base name>
2719
2720 This is for explicitly setting the statistics name. Normally we pick
2721 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2722 the mnemonic and operands.
2723
2724 It it thought it maybe necessary to set it when specifying instructions
2725 which implementation isn't following immediately or aren't implemented yet.
2726 """
2727 oInstr = self.ensureInstructionForOpTag(iTagLine);
2728
2729 # Flatten and validate the value.
2730 sStats = self.flattenAllSections(aasSections);
2731 if not self.oReStatsName.match(sStats):
2732 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2733 % (sTag, sStats, self.oReStatsName.pattern));
2734
2735 if oInstr.sStats is not None:
2736 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2737 % (sTag, oInstr.sStats, sStats,));
2738 oInstr.sStats = sStats;
2739
2740 _ = iEndLine;
2741 return True;
2742
2743 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2744 """
2745 Tag: \@opdone
2746 Value: none
2747
2748 Used to explictily flush the instructions that have been specified.
2749 """
2750 sFlattened = self.flattenAllSections(aasSections);
2751 if sFlattened != '':
2752 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2753 _ = sTag; _ = iEndLine;
2754 return self.doneInstructions();
2755
2756 ## @}
2757
2758
2759 def parseComment(self):
2760 """
2761 Parse the current comment (self.sComment).
2762
2763 If it's a opcode specifiying comment, we reset the macro stuff.
2764 """
2765 #
2766 # Reject if comment doesn't seem to contain anything interesting.
2767 #
2768 if self.sComment.find('Opcode') < 0 \
2769 and self.sComment.find('@') < 0:
2770 return False;
2771
2772 #
2773 # Split the comment into lines, removing leading asterisks and spaces.
2774 # Also remove leading and trailing empty lines.
2775 #
2776 asLines = self.sComment.split('\n');
2777 for iLine, sLine in enumerate(asLines):
2778 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2779
2780 while asLines and not asLines[0]:
2781 self.iCommentLine += 1;
2782 asLines.pop(0);
2783
2784 while asLines and not asLines[-1]:
2785 asLines.pop(len(asLines) - 1);
2786
2787 #
2788 # Check for old style: Opcode 0x0f 0x12
2789 #
2790 if asLines[0].startswith('Opcode '):
2791 self.parseCommentOldOpcode(asLines);
2792
2793 #
2794 # Look for @op* tagged data.
2795 #
2796 cOpTags = 0;
2797 sFlatDefault = None;
2798 sCurTag = '@default';
2799 iCurTagLine = 0;
2800 asCurSection = [];
2801 aasSections = [ asCurSection, ];
2802 for iLine, sLine in enumerate(asLines):
2803 if not sLine.startswith('@'):
2804 if sLine:
2805 asCurSection.append(sLine);
2806 elif asCurSection:
2807 asCurSection = [];
2808 aasSections.append(asCurSection);
2809 else:
2810 #
2811 # Process the previous tag.
2812 #
2813 if not asCurSection and len(aasSections) > 1:
2814 aasSections.pop(-1);
2815 if sCurTag in self.dTagHandlers:
2816 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2817 cOpTags += 1;
2818 elif sCurTag.startswith('@op'):
2819 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2820 elif sCurTag == '@default':
2821 sFlatDefault = self.flattenAllSections(aasSections);
2822 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2823 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2824 elif sCurTag in ['@encoding', '@opencoding']:
2825 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2826
2827 #
2828 # New tag.
2829 #
2830 asSplit = sLine.split(None, 1);
2831 sCurTag = asSplit[0].lower();
2832 if len(asSplit) > 1:
2833 asCurSection = [asSplit[1],];
2834 else:
2835 asCurSection = [];
2836 aasSections = [asCurSection, ];
2837 iCurTagLine = iLine;
2838
2839 #
2840 # Process the final tag.
2841 #
2842 if not asCurSection and len(aasSections) > 1:
2843 aasSections.pop(-1);
2844 if sCurTag in self.dTagHandlers:
2845 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2846 cOpTags += 1;
2847 elif sCurTag.startswith('@op'):
2848 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2849 elif sCurTag == '@default':
2850 sFlatDefault = self.flattenAllSections(aasSections);
2851
2852 #
2853 # Don't allow default text in blocks containing @op*.
2854 #
2855 if cOpTags > 0 and sFlatDefault:
2856 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2857
2858 return True;
2859
2860 def parseMacroInvocation(self, sInvocation):
2861 """
2862 Parses a macro invocation.
2863
2864 Returns a tuple, first element is the offset following the macro
2865 invocation. The second element is a list of macro arguments, where the
2866 zero'th is the macro name.
2867 """
2868 # First the name.
2869 offOpen = sInvocation.find('(');
2870 if offOpen <= 0:
2871 self.raiseError("macro invocation open parenthesis not found");
2872 sName = sInvocation[:offOpen].strip();
2873 if not self.oReMacroName.match(sName):
2874 return self.error("invalid macro name '%s'" % (sName,));
2875 asRet = [sName, ];
2876
2877 # Arguments.
2878 iLine = self.iLine;
2879 cDepth = 1;
2880 off = offOpen + 1;
2881 offStart = off;
2882 while cDepth > 0:
2883 if off >= len(sInvocation):
2884 if iLine >= len(self.asLines):
2885 return self.error('macro invocation beyond end of file');
2886 sInvocation += self.asLines[iLine];
2887 iLine += 1;
2888 ch = sInvocation[off];
2889
2890 if ch == ',' or ch == ')':
2891 if cDepth == 1:
2892 asRet.append(sInvocation[offStart:off].strip());
2893 offStart = off + 1;
2894 if ch == ')':
2895 cDepth -= 1;
2896 elif ch == '(':
2897 cDepth += 1;
2898 off += 1;
2899
2900 return (off, asRet);
2901
2902 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2903 """
2904 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2905 """
2906 offHit = sCode.find(sMacro);
2907 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2908 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2909 return (offHit + offAfter, asRet);
2910 return (len(sCode), None);
2911
2912 def findAndParseMacroInvocation(self, sCode, sMacro):
2913 """
2914 Returns None if not found, arguments as per parseMacroInvocation if found.
2915 """
2916 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2917
2918 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2919 """
2920 Returns same as findAndParseMacroInvocation.
2921 """
2922 for sMacro in asMacro:
2923 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2924 if asRet is not None:
2925 return asRet;
2926 return None;
2927
2928 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2929 sDisHints, sIemHints, asOperands):
2930 """
2931 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2932 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2933 """
2934 #
2935 # Some invocation checks.
2936 #
2937 if sUpper != sUpper.upper():
2938 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2939 if sLower != sLower.lower():
2940 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2941 if sUpper.lower() != sLower:
2942 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2943 if not self.oReMnemonic.match(sLower):
2944 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2945
2946 #
2947 # Check if sIemHints tells us to not consider this macro invocation.
2948 #
2949 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2950 return True;
2951
2952 # Apply to the last instruction only for now.
2953 if not self.aoCurInstrs:
2954 self.addInstruction();
2955 oInstr = self.aoCurInstrs[-1];
2956 if oInstr.iLineMnemonicMacro == -1:
2957 oInstr.iLineMnemonicMacro = self.iLine;
2958 else:
2959 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2960 % (sMacro, oInstr.iLineMnemonicMacro,));
2961
2962 # Mnemonic
2963 if oInstr.sMnemonic is None:
2964 oInstr.sMnemonic = sLower;
2965 elif oInstr.sMnemonic != sLower:
2966 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2967
2968 # Process operands.
2969 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2970 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2971 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2972 for iOperand, sType in enumerate(asOperands):
2973 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2974 if sWhere is None:
2975 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2976 if iOperand < len(oInstr.aoOperands): # error recovery.
2977 sWhere = oInstr.aoOperands[iOperand].sWhere;
2978 sType = oInstr.aoOperands[iOperand].sType;
2979 else:
2980 sWhere = 'reg';
2981 sType = 'Gb';
2982 if iOperand == len(oInstr.aoOperands):
2983 oInstr.aoOperands.append(Operand(sWhere, sType))
2984 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2985 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2986 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2987 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2988
2989 # Encoding.
2990 if sForm not in g_kdIemForms:
2991 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2992 else:
2993 if oInstr.sEncoding is None:
2994 oInstr.sEncoding = g_kdIemForms[sForm][0];
2995 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2996 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2997 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2998
2999 # Check the parameter locations for the encoding.
3000 if g_kdIemForms[sForm][1] is not None:
3001 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
3002 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
3003 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
3004 else:
3005 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
3006 if oInstr.aoOperands[iOperand].sWhere != sWhere:
3007 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
3008 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
3009
3010 # Stats.
3011 if not self.oReStatsName.match(sStats):
3012 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
3013 elif oInstr.sStats is None:
3014 oInstr.sStats = sStats;
3015 elif oInstr.sStats != sStats:
3016 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
3017 % (sMacro, oInstr.sStats, sStats,));
3018
3019 # Process the hints (simply merge with @ophints w/o checking anything).
3020 for sHint in sDisHints.split('|'):
3021 sHint = sHint.strip();
3022 if sHint.startswith('DISOPTYPE_'):
3023 sShortHint = sHint[len('DISOPTYPE_'):].lower();
3024 if sShortHint in g_kdHints:
3025 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3026 else:
3027 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
3028 elif sHint != '0':
3029 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
3030
3031 for sHint in sIemHints.split('|'):
3032 sHint = sHint.strip();
3033 if sHint.startswith('IEMOPHINT_'):
3034 sShortHint = sHint[len('IEMOPHINT_'):].lower();
3035 if sShortHint in g_kdHints:
3036 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3037 else:
3038 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
3039 elif sHint != '0':
3040 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
3041
3042
3043 _ = sAsm;
3044 return True;
3045
3046 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
3047 """
3048 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
3049 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
3050 """
3051 if not asOperands:
3052 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3053 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
3054 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3055
3056 def checkCodeForMacro(self, sCode):
3057 """
3058 Checks code for relevant macro invocation.
3059 """
3060 #
3061 # Scan macro invocations.
3062 #
3063 if sCode.find('(') > 0:
3064 # Look for instruction decoder function definitions. ASSUME single line.
3065 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3066 [ 'FNIEMOP_DEF',
3067 'FNIEMOP_STUB',
3068 'FNIEMOP_STUB_1',
3069 'FNIEMOP_UD_STUB',
3070 'FNIEMOP_UD_STUB_1' ]);
3071 if asArgs is not None:
3072 sFunction = asArgs[1];
3073
3074 if not self.aoCurInstrs:
3075 self.addInstruction();
3076 for oInstr in self.aoCurInstrs:
3077 if oInstr.iLineFnIemOpMacro == -1:
3078 oInstr.iLineFnIemOpMacro = self.iLine;
3079 else:
3080 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3081 self.setInstrunctionAttrib('sFunction', sFunction);
3082 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3083 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3084 if asArgs[0].find('STUB') > 0:
3085 self.doneInstructions();
3086 return True;
3087
3088 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3089 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3090 if asArgs is not None:
3091 if len(self.aoCurInstrs) == 1:
3092 oInstr = self.aoCurInstrs[0];
3093 if oInstr.sStats is None:
3094 oInstr.sStats = asArgs[1];
3095 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3096
3097 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3098 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3099 if asArgs is not None:
3100 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3101 []);
3102 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3103 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3104 if asArgs is not None:
3105 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3106 [asArgs[6],]);
3107 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3108 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3109 if asArgs is not None:
3110 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3111 [asArgs[6], asArgs[7]]);
3112 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3113 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3114 if asArgs is not None:
3115 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3116 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3117 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3118 # a_fIemHints)
3119 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3120 if asArgs is not None:
3121 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3122 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3123
3124 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3125 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3126 if asArgs is not None:
3127 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3128 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3129 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3130 if asArgs is not None:
3131 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3132 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3133 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3134 if asArgs is not None:
3135 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3136 [asArgs[4], asArgs[5],]);
3137 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3138 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3139 if asArgs is not None:
3140 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3141 [asArgs[4], asArgs[5], asArgs[6],]);
3142 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3143 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3144 if asArgs is not None:
3145 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3146 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3147
3148 return False;
3149
3150
3151 def parse(self):
3152 """
3153 Parses the given file.
3154 Returns number or errors.
3155 Raises exception on fatal trouble.
3156 """
3157 #self.debug('Parsing %s' % (self.sSrcFile,));
3158
3159 while self.iLine < len(self.asLines):
3160 sLine = self.asLines[self.iLine];
3161 self.iLine += 1;
3162
3163 # We only look for comments, so only lines with a slash might possibly
3164 # influence the parser state.
3165 offSlash = sLine.find('/');
3166 if offSlash >= 0:
3167 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3168 offLine = 0;
3169 while offLine < len(sLine):
3170 if self.iState == self.kiCode:
3171 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3172 if offHit >= 0:
3173 self.checkCodeForMacro(sLine[offLine:offHit]);
3174 self.sComment = '';
3175 self.iCommentLine = self.iLine;
3176 self.iState = self.kiCommentMulti;
3177 offLine = offHit + 2;
3178 else:
3179 self.checkCodeForMacro(sLine[offLine:]);
3180 offLine = len(sLine);
3181
3182 elif self.iState == self.kiCommentMulti:
3183 offHit = sLine.find('*/', offLine);
3184 if offHit >= 0:
3185 self.sComment += sLine[offLine:offHit];
3186 self.iState = self.kiCode;
3187 offLine = offHit + 2;
3188 self.parseComment();
3189 else:
3190 self.sComment += sLine[offLine:];
3191 offLine = len(sLine);
3192 else:
3193 assert False;
3194 # C++ line comment.
3195 elif offSlash > 0:
3196 self.checkCodeForMacro(sLine[:offSlash]);
3197
3198 # No slash, but append the line if in multi-line comment.
3199 elif self.iState == self.kiCommentMulti:
3200 #self.debug('line %d: multi' % (self.iLine,));
3201 self.sComment += sLine;
3202
3203 # No slash, but check code line for relevant macro.
3204 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3205 #self.debug('line %d: macro' % (self.iLine,));
3206 self.checkCodeForMacro(sLine);
3207
3208 # If the line is a '}' in the first position, complete the instructions.
3209 elif self.iState == self.kiCode and sLine[0] == '}':
3210 #self.debug('line %d: }' % (self.iLine,));
3211 self.doneInstructions();
3212
3213 self.doneInstructions();
3214 self.debug('%3s stubs out of %3s instructions in %s'
3215 % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3216 return self.printErrors();
3217
3218
3219def __parseFileByName(sSrcFile, sDefaultMap):
3220 """
3221 Parses one source file for instruction specfications.
3222 """
3223 #
3224 # Read sSrcFile into a line array.
3225 #
3226 try:
3227 oFile = open(sSrcFile, "r");
3228 except Exception as oXcpt:
3229 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3230 try:
3231 asLines = oFile.readlines();
3232 except Exception as oXcpt:
3233 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3234 finally:
3235 oFile.close();
3236
3237 #
3238 # Do the parsing.
3239 #
3240 try:
3241 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3242 except ParserException as oXcpt:
3243 print(str(oXcpt));
3244 raise;
3245 except Exception as oXcpt:
3246 raise;
3247
3248 return cErrors;
3249
3250
3251def __doTestCopying():
3252 """
3253 Executes the asCopyTests instructions.
3254 """
3255 asErrors = [];
3256 for oDstInstr in g_aoAllInstructions:
3257 if oDstInstr.asCopyTests:
3258 for sSrcInstr in oDstInstr.asCopyTests:
3259 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3260 if oSrcInstr:
3261 aoSrcInstrs = [oSrcInstr,];
3262 else:
3263 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3264 if aoSrcInstrs:
3265 for oSrcInstr in aoSrcInstrs:
3266 if oSrcInstr != oDstInstr:
3267 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3268 else:
3269 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3270 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3271 else:
3272 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3273 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3274
3275 if asErrors:
3276 sys.stderr.write(u''.join(asErrors));
3277 return len(asErrors);
3278
3279
3280def __applyOnlyTest():
3281 """
3282 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3283 all other instructions so that only these get tested.
3284 """
3285 if g_aoOnlyTestInstructions:
3286 for oInstr in g_aoAllInstructions:
3287 if oInstr.aoTests:
3288 if oInstr not in g_aoOnlyTestInstructions:
3289 oInstr.aoTests = [];
3290 return 0;
3291
3292def __parseAll():
3293 """
3294 Parses all the IEMAllInstruction*.cpp.h files.
3295
3296 Raises exception on failure.
3297 """
3298 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3299 cErrors = 0;
3300 for sDefaultMap, sName in [
3301 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3302 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3303 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3304 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3305 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3306 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3307 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3308 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3309 ]:
3310 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3311 cErrors += __doTestCopying();
3312 cErrors += __applyOnlyTest();
3313
3314 if cErrors != 0:
3315 #raise Exception('%d parse errors' % (cErrors,));
3316 sys.exit(1);
3317 return True;
3318
3319
3320
3321__parseAll();
3322
3323
3324#
3325# Generators (may perhaps move later).
3326#
3327def generateDisassemblerTables(oDstFile = sys.stdout):
3328 """
3329 Generates disassembler tables.
3330 """
3331
3332 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3333 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3334 assert oMap.sName == sName;
3335 asLines = [];
3336
3337 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3338 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3339 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3340 asLines.append('{');
3341
3342 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3343
3344 aoTableOrder = oMap.getInstructionsInTableOrder();
3345 for iInstr, oInstr in enumerate(aoTableOrder):
3346
3347 if (iInstr & 0xf) == 0:
3348 if iInstr != 0:
3349 asLines.append('');
3350 asLines.append(' /* %x */' % (iInstr >> 4,));
3351
3352 if oInstr is None:
3353 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3354 elif isinstance(oInstr, list):
3355 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3356 else:
3357 sMacro = 'OP';
3358 cMaxOperands = 3;
3359 if len(oInstr.aoOperands) > 3:
3360 sMacro = 'OPVEX'
3361 cMaxOperands = 4;
3362 assert len(oInstr.aoOperands) <= cMaxOperands;
3363
3364 #
3365 # Format string.
3366 #
3367 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3368 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3369 sTmp += ' ' if iOperand == 0 else ',';
3370 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3371 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3372 else:
3373 sTmp += g_kdOpTypes[oOperand.sType][2];
3374 sTmp += '",';
3375 asColumns = [ sTmp, ];
3376
3377 #
3378 # Decoders.
3379 #
3380 iStart = len(asColumns);
3381 if oInstr.sEncoding is None:
3382 pass;
3383 elif oInstr.sEncoding == 'ModR/M':
3384 # ASSUME the first operand is using the ModR/M encoding
3385 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3386 asColumns.append('IDX_ParseModRM,');
3387 ## @todo IDX_ParseVexDest
3388 # Is second operand using ModR/M too?
3389 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3390 asColumns.append('IDX_UseModRM,')
3391 elif oInstr.sEncoding in [ 'prefix', ]:
3392 for oOperand in oInstr.aoOperands:
3393 asColumns.append('0,');
3394 elif oInstr.sEncoding in [ 'fixed' ]:
3395 pass;
3396 elif oInstr.sEncoding == 'vex2':
3397 asColumns.append('IDX_ParseVex2b,')
3398 elif oInstr.sEncoding == 'vex3':
3399 asColumns.append('IDX_ParseVex3b,')
3400 elif oInstr.sEncoding in g_dInstructionMaps:
3401 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3402 else:
3403 ## @todo
3404 #IDX_ParseTwoByteEsc,
3405 #IDX_ParseGrp1,
3406 #IDX_ParseShiftGrp2,
3407 #IDX_ParseGrp3,
3408 #IDX_ParseGrp4,
3409 #IDX_ParseGrp5,
3410 #IDX_Parse3DNow,
3411 #IDX_ParseGrp6,
3412 #IDX_ParseGrp7,
3413 #IDX_ParseGrp8,
3414 #IDX_ParseGrp9,
3415 #IDX_ParseGrp10,
3416 #IDX_ParseGrp12,
3417 #IDX_ParseGrp13,
3418 #IDX_ParseGrp14,
3419 #IDX_ParseGrp15,
3420 #IDX_ParseGrp16,
3421 #IDX_ParseThreeByteEsc4,
3422 #IDX_ParseThreeByteEsc5,
3423 #IDX_ParseModFence,
3424 #IDX_ParseEscFP,
3425 #IDX_ParseNopPause,
3426 #IDX_ParseInvOpModRM,
3427 assert False, str(oInstr);
3428
3429 # Check for immediates and stuff in the remaining operands.
3430 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3431 sIdx = g_kdOpTypes[oOperand.sType][0];
3432 if sIdx != 'IDX_UseModRM':
3433 asColumns.append(sIdx + ',');
3434 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3435
3436 #
3437 # Opcode and operands.
3438 #
3439 assert oInstr.sDisEnum, str(oInstr);
3440 asColumns.append(oInstr.sDisEnum + ',');
3441 iStart = len(asColumns)
3442 for oOperand in oInstr.aoOperands:
3443 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3444 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3445
3446 #
3447 # Flags.
3448 #
3449 sTmp = '';
3450 for sHint in sorted(oInstr.dHints.keys()):
3451 sDefine = g_kdHints[sHint];
3452 if sDefine.startswith('DISOPTYPE_'):
3453 if sTmp:
3454 sTmp += ' | ' + sDefine;
3455 else:
3456 sTmp += sDefine;
3457 if sTmp:
3458 sTmp += '),';
3459 else:
3460 sTmp += '0),';
3461 asColumns.append(sTmp);
3462
3463 #
3464 # Format the columns into a line.
3465 #
3466 sLine = '';
3467 for i, s in enumerate(asColumns):
3468 if len(sLine) < aoffColumns[i]:
3469 sLine += ' ' * (aoffColumns[i] - len(sLine));
3470 else:
3471 sLine += ' ';
3472 sLine += s;
3473
3474 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3475 # DISOPTYPE_HARMLESS),
3476 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3477 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3478
3479 asLines.append(sLine);
3480
3481 asLines.append('};');
3482 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3483
3484 #
3485 # Write out the lines.
3486 #
3487 oDstFile.write('\n'.join(asLines));
3488 oDstFile.write('\n');
3489 break; #for now
3490
3491if __name__ == '__main__':
3492 generateDisassemblerTables();
3493
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette