VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 67012

Last change on this file since 67012 was 67012, checked in by vboxsync, 8 years ago

IEM: Implemented vmovd Ed,Vd and vmovq Eq,Vq (VEX.66.0F 7e).

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 146.4 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 67012 2017-05-22 12:26:25Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 67012 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', ),
217 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', ),
218 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', ),
219 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', ),
220 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
221 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
222 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', ),
223 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', ),
224 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
225 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
226 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
227 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
228 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
229 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
230 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
231 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
232 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
233 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
234 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
235 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
236 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', ),
237
238 # ModR/M.rm - register only.
239 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
240 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
241 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
242 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
243 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
244 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
245 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', ),
246
247 # ModR/M.rm - memory only.
248 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
249 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
250 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
251 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
252 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
253 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
254 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
255 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', ),
256 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', ),
257 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
258 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
259
260 # ModR/M.reg
261 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
262 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
263 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
264 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
265 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', ),
266 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', ),
267 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', ),
268 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', ),
269 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', ),
270 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', ),
271 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', ),
272 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
273 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
274 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
275 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
276 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
277 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
278 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
279 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
280 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
281 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
282 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
283 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
284 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', ),
285 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
286 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
287 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', ),
288 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', ),
289
290 # VEX.vvvv
291 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', ),
292 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', ),
293 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', ),
294
295 # Immediate values.
296 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
297 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
298 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
299 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
300 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
301 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
302
303 # Address operands (no ModR/M).
304 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
305 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
306
307 # Relative jump targets
308 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
309 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
310
311 # DS:rSI
312 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
313 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
314 # ES:rDI
315 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
316 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
317
318 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
319
320 # Fixed registers.
321 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
322 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
323 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
324 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
325 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
326 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
327 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
328 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
329};
330
331# IDX_ParseFixedReg
332# IDX_ParseVexDest
333
334
335## IEMFORM_XXX mappings.
336g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
337 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
338 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
339 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
340 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
341 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
342 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
343 'M': ( 'ModR/M', [ 'rm', ], ),
344 'M_REG': ( 'ModR/M', [ 'rm', ], ),
345 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
346 'R': ( 'ModR/M', [ 'reg', ], ),
347
348 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
349 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
350 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
351 'VEX_XM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ), # same as VEX_RM_MEM, typo?
352 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
353 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
354 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
355 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
356 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
357 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
358 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
359 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
360 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
361 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
362 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
363 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
364 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
365
366 'FIXED': ( 'fixed', None, )
367};
368
369## \@oppfx values.
370g_kdPrefixes = {
371 'none': [],
372 '0x66': [],
373 '0xf3': [],
374 '0xf2': [],
375};
376
377## Special \@opcode tag values.
378g_kdSpecialOpcodes = {
379 '/reg': [],
380 'mr/reg': [],
381 '11 /reg': [],
382 '!11 /reg': [],
383 '11 mr/reg': [],
384 '!11 mr/reg': [],
385};
386
387## Special \@opcodesub tag values.
388g_kdSubOpcodes = {
389 'none': [ None, ],
390 '11 mr/reg': [ '11 mr/reg', ],
391 '11': [ '11 mr/reg', ], ##< alias
392 '!11 mr/reg': [ '!11 mr/reg', ],
393 '!11': [ '!11 mr/reg', ], ##< alias
394 'rex.w=0': [ 'rex.w=0', ],
395 'w=0': [ 'rex.w=0', ], ##< alias
396 'rex.w=1': [ 'rex.w=1', ],
397 'w=1': [ 'rex.w=1', ], ##< alias
398};
399
400## Valid values for \@openc
401g_kdEncodings = {
402 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
403 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
404 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
405 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
406 'prefix': [ None, ], ##< Prefix
407};
408
409## \@opunused, \@opinvalid, \@opinvlstyle
410g_kdInvalidStyles = {
411 'immediate': [], ##< CPU stops decoding immediately after the opcode.
412 'vex.modrm': [], ##< VEX+ModR/M, everyone.
413 'intel-modrm': [], ##< Intel decodes ModR/M.
414 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
415 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
416 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
417};
418
419g_kdCpuNames = {
420 '8086': (),
421 '80186': (),
422 '80286': (),
423 '80386': (),
424 '80486': (),
425};
426
427## \@opcpuid
428g_kdCpuIdFlags = {
429 'vme': 'X86_CPUID_FEATURE_EDX_VME',
430 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
431 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
432 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
433 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
434 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
435 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
436 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
437 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
438 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
439 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
440 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
441 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
442 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
443 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
444 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
445 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
446 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
447 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
448 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
449 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
450 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
451 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
452 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
453 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
454 'aes': 'X86_CPUID_FEATURE_ECX_AES',
455 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
456 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
457 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
458 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
459
460 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
461 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
462 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
463 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
464 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
465 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
466 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
467 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
468 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
469 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
470};
471
472## \@ophints values.
473g_kdHints = {
474 'invalid': 'DISOPTYPE_INVALID', ##<
475 'harmless': 'DISOPTYPE_HARMLESS', ##<
476 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
477 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
478 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
479 'portio': 'DISOPTYPE_PORTIO', ##<
480 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
481 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
482 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
483 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
484 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
485 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
486 'illegal': 'DISOPTYPE_ILLEGAL', ##<
487 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
488 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
489 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
490 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
491 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
492 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
493 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
494 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
495 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
496 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
497 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
498 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
499 ## (only in 16 & 32 bits mode!)
500 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
501 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
502 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
503 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
504 'ignores_rexw': '', ##< Ignores REX.W.
505 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
506 'ignores_vex_l': '', ##< Ignores VEX.L.
507 'vex_l_zero': '', ##< VEX.L must be 0.
508 'lock_allowed': '', ##< Lock prefix allowed.
509};
510
511## \@opxcpttype values (see SDMv2 2.4, 2.7).
512g_kdXcptTypes = {
513 'none': [],
514 '1': [],
515 '2': [],
516 '3': [],
517 '4': [],
518 '4UA': [],
519 '5': [],
520 '5LZ': [], # LZ = VEX.L must be zero.
521 '6': [],
522 '7': [],
523 '7LZ': [],
524 '8': [],
525 '11': [],
526 '12': [],
527 'E1': [],
528 'E1NF': [],
529 'E2': [],
530 'E3': [],
531 'E3NF': [],
532 'E4': [],
533 'E4NF': [],
534 'E5': [],
535 'E5NF': [],
536 'E6': [],
537 'E6NF': [],
538 'E7NF': [],
539 'E9': [],
540 'E9NF': [],
541 'E10': [],
542 'E11': [],
543 'E12': [],
544 'E12NF': [],
545};
546
547
548def _isValidOpcodeByte(sOpcode):
549 """
550 Checks if sOpcode is a valid lower case opcode byte.
551 Returns true/false.
552 """
553 if len(sOpcode) == 4:
554 if sOpcode[:2] == '0x':
555 if sOpcode[2] in '0123456789abcdef':
556 if sOpcode[3] in '0123456789abcdef':
557 return True;
558 return False;
559
560
561class InstructionMap(object):
562 """
563 Instruction map.
564
565 The opcode map provides the lead opcode bytes (empty for the one byte
566 opcode map). An instruction can be member of multiple opcode maps as long
567 as it uses the same opcode value within the map (because of VEX).
568 """
569
570 kdEncodings = {
571 'legacy': [],
572 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
573 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
574 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
575 'xop8': [], ##< XOP prefix with vvvvv = 8
576 'xop9': [], ##< XOP prefix with vvvvv = 9
577 'xop10': [], ##< XOP prefix with vvvvv = 10
578 };
579 ## Selectors.
580 ## The first value is the number of table entries required by a
581 ## decoder or disassembler for this type of selector.
582 kdSelectors = {
583 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
584 '/r': [ 8, ], ##< modrm.reg selects the instruction.
585 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
586 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
587 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
588 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
589 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
590 };
591
592 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
593 assert sSelector in self.kdSelectors;
594 assert sEncoding in self.kdEncodings;
595 if asLeadOpcodes is None:
596 asLeadOpcodes = [];
597 else:
598 for sOpcode in asLeadOpcodes:
599 assert _isValidOpcodeByte(sOpcode);
600 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
601
602 self.sName = sName;
603 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
604 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
605 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
606 self.aoInstructions = []; # type: Instruction
607 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
608
609 def getTableSize(self):
610 """
611 Number of table entries. This corresponds directly to the selector.
612 """
613 return self.kdSelectors[self.sSelector][0];
614
615 def getInstructionIndex(self, oInstr):
616 """
617 Returns the table index for the instruction.
618 """
619 bOpcode = oInstr.getOpcodeByte();
620
621 # The byte selector is simple. We need a full opcode byte and need just return it.
622 if self.sSelector == 'byte':
623 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
624 return bOpcode;
625
626 # The other selectors needs masking and shifting.
627 if self.sSelector == '/r':
628 return (bOpcode >> 3) & 0x7;
629
630 if self.sSelector == 'mod /r':
631 return (bOpcode >> 3) & 0x1f;
632
633 if self.sSelector == 'memreg /r':
634 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
635
636 if self.sSelector == '!11 /r':
637 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
638 return (bOpcode >> 3) & 0x7;
639
640 if self.sSelector == '11 /r':
641 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
642 return (bOpcode >> 3) & 0x7;
643
644 if self.sSelector == '11':
645 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
646 return bOpcode & 0x3f;
647
648 assert False, self.sSelector;
649 return -1;
650
651 def getInstructionsInTableOrder(self):
652 """
653 Get instructions in table order.
654
655 Returns array of instructions. Normally there is exactly one
656 instruction per entry. However the entry could also be None if
657 not instruction was specified for that opcode value. Or there
658 could be a list of instructions to deal with special encodings
659 where for instance prefix (e.g. REX.W) encodes a different
660 instruction or different CPUs have different instructions or
661 prefixes in the same place.
662 """
663 # Start with empty table.
664 cTable = self.getTableSize();
665 aoTable = [None] * cTable;
666
667 # Insert the instructions.
668 for oInstr in self.aoInstructions:
669 if oInstr.sOpcode:
670 idxOpcode = self.getInstructionIndex(oInstr);
671 assert idxOpcode < cTable, str(idxOpcode);
672
673 oExisting = aoTable[idxOpcode];
674 if oExisting is None:
675 aoTable[idxOpcode] = oInstr;
676 elif not isinstance(oExisting, list):
677 aoTable[idxOpcode] = list([oExisting, oInstr]);
678 else:
679 oExisting.append(oInstr);
680
681 return aoTable;
682
683
684 def getDisasTableName(self):
685 """
686 Returns the disassembler table name for this map.
687 """
688 sName = 'g_aDisas';
689 for sWord in self.sName.split('_'):
690 if sWord == 'm': # suffix indicating modrm.mod==mem
691 sName += '_m';
692 elif sWord == 'r': # suffix indicating modrm.mod==reg
693 sName += '_r';
694 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
695 sName += '_' + sWord;
696 else:
697 sWord = sWord.replace('grp', 'Grp');
698 sWord = sWord.replace('map', 'Map');
699 sName += sWord[0].upper() + sWord[1:];
700 return sName;
701
702
703 def isVexMap(self):
704 """ Returns True if a VEX map. """
705 return self.sEncoding.startswith('vex');
706
707
708class TestType(object):
709 """
710 Test value type.
711
712 This base class deals with integer like values. The fUnsigned constructor
713 parameter indicates the default stance on zero vs sign extending. It is
714 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
715 """
716 def __init__(self, sName, acbSizes = None, fUnsigned = True):
717 self.sName = sName;
718 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
719 self.fUnsigned = fUnsigned;
720
721 class BadValue(Exception):
722 """ Bad value exception. """
723 def __init__(self, sMessage):
724 Exception.__init__(self, sMessage);
725 self.sMessage = sMessage;
726
727 ## For ascii ~ operator.
728 kdHexInv = {
729 '0': 'f',
730 '1': 'e',
731 '2': 'd',
732 '3': 'c',
733 '4': 'b',
734 '5': 'a',
735 '6': '9',
736 '7': '8',
737 '8': '7',
738 '9': '6',
739 'a': '5',
740 'b': '4',
741 'c': '3',
742 'd': '2',
743 'e': '1',
744 'f': '0',
745 };
746
747 def get(self, sValue):
748 """
749 Get the shortest normal sized byte representation of oValue.
750
751 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
752 The latter form is for AND+OR pairs where the first entry is what to
753 AND with the field and the second the one or OR with.
754
755 Raises BadValue if invalid value.
756 """
757 if not sValue:
758 raise TestType.BadValue('empty value');
759
760 # Deal with sign and detect hexadecimal or decimal.
761 fSignExtend = not self.fUnsigned;
762 if sValue[0] == '-' or sValue[0] == '+':
763 fSignExtend = True;
764 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
765 else:
766 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
767
768 # try convert it to long integer.
769 try:
770 iValue = long(sValue, 16 if fHex else 10);
771 except Exception as oXcpt:
772 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
773
774 # Convert the hex string and pad it to a decent value. Negative values
775 # needs to be manually converted to something non-negative (~-n + 1).
776 if iValue >= 0:
777 sHex = hex(iValue);
778 if sys.version_info[0] < 3:
779 assert sHex[-1] == 'L';
780 sHex = sHex[:-1];
781 assert sHex[:2] == '0x';
782 sHex = sHex[2:];
783 else:
784 sHex = hex(-iValue - 1);
785 if sys.version_info[0] < 3:
786 assert sHex[-1] == 'L';
787 sHex = sHex[:-1];
788 assert sHex[:2] == '0x';
789 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
790 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
791 sHex = 'f' + sHex;
792
793 cDigits = len(sHex);
794 if cDigits <= self.acbSizes[-1] * 2:
795 for cb in self.acbSizes:
796 cNaturalDigits = cb * 2;
797 if cDigits <= cNaturalDigits:
798 break;
799 else:
800 cNaturalDigits = self.acbSizes[-1] * 2;
801 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
802 assert isinstance(cNaturalDigits, int)
803
804 if cNaturalDigits != cDigits:
805 cNeeded = cNaturalDigits - cDigits;
806 if iValue >= 0:
807 sHex = ('0' * cNeeded) + sHex;
808 else:
809 sHex = ('f' * cNeeded) + sHex;
810
811 # Invert and convert to bytearray and return it.
812 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
813
814 return ((fSignExtend, abValue),);
815
816 def validate(self, sValue):
817 """
818 Returns True if value is okay, error message on failure.
819 """
820 try:
821 self.get(sValue);
822 except TestType.BadValue as oXcpt:
823 return oXcpt.sMessage;
824 return True;
825
826 def isAndOrPair(self, sValue):
827 """
828 Checks if sValue is a pair.
829 """
830 _ = sValue;
831 return False;
832
833
834class TestTypeEflags(TestType):
835 """
836 Special value parsing for EFLAGS/RFLAGS/FLAGS.
837 """
838
839 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
840
841 def __init__(self, sName):
842 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
843
844 def get(self, sValue):
845 fClear = 0;
846 fSet = 0;
847 for sFlag in sValue.split(','):
848 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
849 if sConstant is None:
850 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
851 if sConstant[0] == '!':
852 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
853 else:
854 fSet |= g_kdX86EFlagsConstants[sConstant];
855
856 aoSet = TestType.get(self, '0x%x' % (fSet,));
857 if fClear != 0:
858 aoClear = TestType.get(self, '%#x' % (fClear,))
859 assert self.isAndOrPair(sValue) is True;
860 return (aoClear[0], aoSet[0]);
861 assert self.isAndOrPair(sValue) is False;
862 return aoSet;
863
864 def isAndOrPair(self, sValue):
865 for sZeroFlag in self.kdZeroValueFlags:
866 if sValue.find(sZeroFlag) >= 0:
867 return True;
868 return False;
869
870class TestTypeFromDict(TestType):
871 """
872 Special value parsing for CR0.
873 """
874
875 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
876
877 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
878 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
879 self.kdConstantsAndValues = kdConstantsAndValues;
880 self.sConstantPrefix = sConstantPrefix;
881
882 def get(self, sValue):
883 fValue = 0;
884 for sFlag in sValue.split(','):
885 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
886 if fFlagValue is None:
887 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
888 fValue |= fFlagValue;
889 return TestType.get(self, '0x%x' % (fValue,));
890
891
892class TestInOut(object):
893 """
894 One input or output state modifier.
895
896 This should be thought as values to modify BS3REGCTX and extended (needs
897 to be structured) state.
898 """
899 ## Assigned operators.
900 kasOperators = [
901 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
902 '&~=',
903 '&=',
904 '|=',
905 '='
906 ];
907 ## Types
908 kdTypes = {
909 'uint': TestType('uint', fUnsigned = True),
910 'int': TestType('int'),
911 'efl': TestTypeEflags('efl'),
912 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
913 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
914 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
915 };
916 ## CPU context fields.
917 kdFields = {
918 # name: ( default type, [both|input|output], )
919 # Operands.
920 'op1': ( 'uint', 'both', ), ## \@op1
921 'op2': ( 'uint', 'both', ), ## \@op2
922 'op3': ( 'uint', 'both', ), ## \@op3
923 'op4': ( 'uint', 'both', ), ## \@op4
924 # Flags.
925 'efl': ( 'efl', 'both', ),
926 'efl_undef': ( 'uint', 'output', ),
927 # 8-bit GPRs.
928 'al': ( 'uint', 'both', ),
929 'cl': ( 'uint', 'both', ),
930 'dl': ( 'uint', 'both', ),
931 'bl': ( 'uint', 'both', ),
932 'ah': ( 'uint', 'both', ),
933 'ch': ( 'uint', 'both', ),
934 'dh': ( 'uint', 'both', ),
935 'bh': ( 'uint', 'both', ),
936 'r8l': ( 'uint', 'both', ),
937 'r9l': ( 'uint', 'both', ),
938 'r10l': ( 'uint', 'both', ),
939 'r11l': ( 'uint', 'both', ),
940 'r12l': ( 'uint', 'both', ),
941 'r13l': ( 'uint', 'both', ),
942 'r14l': ( 'uint', 'both', ),
943 'r15l': ( 'uint', 'both', ),
944 # 16-bit GPRs.
945 'ax': ( 'uint', 'both', ),
946 'dx': ( 'uint', 'both', ),
947 'cx': ( 'uint', 'both', ),
948 'bx': ( 'uint', 'both', ),
949 'sp': ( 'uint', 'both', ),
950 'bp': ( 'uint', 'both', ),
951 'si': ( 'uint', 'both', ),
952 'di': ( 'uint', 'both', ),
953 'r8w': ( 'uint', 'both', ),
954 'r9w': ( 'uint', 'both', ),
955 'r10w': ( 'uint', 'both', ),
956 'r11w': ( 'uint', 'both', ),
957 'r12w': ( 'uint', 'both', ),
958 'r13w': ( 'uint', 'both', ),
959 'r14w': ( 'uint', 'both', ),
960 'r15w': ( 'uint', 'both', ),
961 # 32-bit GPRs.
962 'eax': ( 'uint', 'both', ),
963 'edx': ( 'uint', 'both', ),
964 'ecx': ( 'uint', 'both', ),
965 'ebx': ( 'uint', 'both', ),
966 'esp': ( 'uint', 'both', ),
967 'ebp': ( 'uint', 'both', ),
968 'esi': ( 'uint', 'both', ),
969 'edi': ( 'uint', 'both', ),
970 'r8d': ( 'uint', 'both', ),
971 'r9d': ( 'uint', 'both', ),
972 'r10d': ( 'uint', 'both', ),
973 'r11d': ( 'uint', 'both', ),
974 'r12d': ( 'uint', 'both', ),
975 'r13d': ( 'uint', 'both', ),
976 'r14d': ( 'uint', 'both', ),
977 'r15d': ( 'uint', 'both', ),
978 # 64-bit GPRs.
979 'rax': ( 'uint', 'both', ),
980 'rdx': ( 'uint', 'both', ),
981 'rcx': ( 'uint', 'both', ),
982 'rbx': ( 'uint', 'both', ),
983 'rsp': ( 'uint', 'both', ),
984 'rbp': ( 'uint', 'both', ),
985 'rsi': ( 'uint', 'both', ),
986 'rdi': ( 'uint', 'both', ),
987 'r8': ( 'uint', 'both', ),
988 'r9': ( 'uint', 'both', ),
989 'r10': ( 'uint', 'both', ),
990 'r11': ( 'uint', 'both', ),
991 'r12': ( 'uint', 'both', ),
992 'r13': ( 'uint', 'both', ),
993 'r14': ( 'uint', 'both', ),
994 'r15': ( 'uint', 'both', ),
995 # 16-bit, 32-bit or 64-bit registers according to operand size.
996 'oz.rax': ( 'uint', 'both', ),
997 'oz.rdx': ( 'uint', 'both', ),
998 'oz.rcx': ( 'uint', 'both', ),
999 'oz.rbx': ( 'uint', 'both', ),
1000 'oz.rsp': ( 'uint', 'both', ),
1001 'oz.rbp': ( 'uint', 'both', ),
1002 'oz.rsi': ( 'uint', 'both', ),
1003 'oz.rdi': ( 'uint', 'both', ),
1004 'oz.r8': ( 'uint', 'both', ),
1005 'oz.r9': ( 'uint', 'both', ),
1006 'oz.r10': ( 'uint', 'both', ),
1007 'oz.r11': ( 'uint', 'both', ),
1008 'oz.r12': ( 'uint', 'both', ),
1009 'oz.r13': ( 'uint', 'both', ),
1010 'oz.r14': ( 'uint', 'both', ),
1011 'oz.r15': ( 'uint', 'both', ),
1012 # Control registers.
1013 'cr0': ( 'cr0', 'both', ),
1014 'cr4': ( 'cr4', 'both', ),
1015 'xcr0': ( 'xcr0', 'both', ),
1016 # FPU Registers
1017 'fcw': ( 'uint', 'both', ),
1018 'fsw': ( 'uint', 'both', ),
1019 'ftw': ( 'uint', 'both', ),
1020 'fop': ( 'uint', 'both', ),
1021 'fpuip': ( 'uint', 'both', ),
1022 'fpucs': ( 'uint', 'both', ),
1023 'fpudp': ( 'uint', 'both', ),
1024 'fpuds': ( 'uint', 'both', ),
1025 'mxcsr': ( 'uint', 'both', ),
1026 'st0': ( 'uint', 'both', ),
1027 'st1': ( 'uint', 'both', ),
1028 'st2': ( 'uint', 'both', ),
1029 'st3': ( 'uint', 'both', ),
1030 'st4': ( 'uint', 'both', ),
1031 'st5': ( 'uint', 'both', ),
1032 'st6': ( 'uint', 'both', ),
1033 'st7': ( 'uint', 'both', ),
1034 # MMX registers.
1035 'mm0': ( 'uint', 'both', ),
1036 'mm1': ( 'uint', 'both', ),
1037 'mm2': ( 'uint', 'both', ),
1038 'mm3': ( 'uint', 'both', ),
1039 'mm4': ( 'uint', 'both', ),
1040 'mm5': ( 'uint', 'both', ),
1041 'mm6': ( 'uint', 'both', ),
1042 'mm7': ( 'uint', 'both', ),
1043 # SSE registers.
1044 'xmm0': ( 'uint', 'both', ),
1045 'xmm1': ( 'uint', 'both', ),
1046 'xmm2': ( 'uint', 'both', ),
1047 'xmm3': ( 'uint', 'both', ),
1048 'xmm4': ( 'uint', 'both', ),
1049 'xmm5': ( 'uint', 'both', ),
1050 'xmm6': ( 'uint', 'both', ),
1051 'xmm7': ( 'uint', 'both', ),
1052 'xmm8': ( 'uint', 'both', ),
1053 'xmm9': ( 'uint', 'both', ),
1054 'xmm10': ( 'uint', 'both', ),
1055 'xmm11': ( 'uint', 'both', ),
1056 'xmm12': ( 'uint', 'both', ),
1057 'xmm13': ( 'uint', 'both', ),
1058 'xmm14': ( 'uint', 'both', ),
1059 'xmm15': ( 'uint', 'both', ),
1060 'xmm0.lo': ( 'uint', 'both', ),
1061 'xmm1.lo': ( 'uint', 'both', ),
1062 'xmm2.lo': ( 'uint', 'both', ),
1063 'xmm3.lo': ( 'uint', 'both', ),
1064 'xmm4.lo': ( 'uint', 'both', ),
1065 'xmm5.lo': ( 'uint', 'both', ),
1066 'xmm6.lo': ( 'uint', 'both', ),
1067 'xmm7.lo': ( 'uint', 'both', ),
1068 'xmm8.lo': ( 'uint', 'both', ),
1069 'xmm9.lo': ( 'uint', 'both', ),
1070 'xmm10.lo': ( 'uint', 'both', ),
1071 'xmm11.lo': ( 'uint', 'both', ),
1072 'xmm12.lo': ( 'uint', 'both', ),
1073 'xmm13.lo': ( 'uint', 'both', ),
1074 'xmm14.lo': ( 'uint', 'both', ),
1075 'xmm15.lo': ( 'uint', 'both', ),
1076 'xmm0.hi': ( 'uint', 'both', ),
1077 'xmm1.hi': ( 'uint', 'both', ),
1078 'xmm2.hi': ( 'uint', 'both', ),
1079 'xmm3.hi': ( 'uint', 'both', ),
1080 'xmm4.hi': ( 'uint', 'both', ),
1081 'xmm5.hi': ( 'uint', 'both', ),
1082 'xmm6.hi': ( 'uint', 'both', ),
1083 'xmm7.hi': ( 'uint', 'both', ),
1084 'xmm8.hi': ( 'uint', 'both', ),
1085 'xmm9.hi': ( 'uint', 'both', ),
1086 'xmm10.hi': ( 'uint', 'both', ),
1087 'xmm11.hi': ( 'uint', 'both', ),
1088 'xmm12.hi': ( 'uint', 'both', ),
1089 'xmm13.hi': ( 'uint', 'both', ),
1090 'xmm14.hi': ( 'uint', 'both', ),
1091 'xmm15.hi': ( 'uint', 'both', ),
1092 'xmm0.lo.zx': ( 'uint', 'both', ),
1093 'xmm1.lo.zx': ( 'uint', 'both', ),
1094 'xmm2.lo.zx': ( 'uint', 'both', ),
1095 'xmm3.lo.zx': ( 'uint', 'both', ),
1096 'xmm4.lo.zx': ( 'uint', 'both', ),
1097 'xmm5.lo.zx': ( 'uint', 'both', ),
1098 'xmm6.lo.zx': ( 'uint', 'both', ),
1099 'xmm7.lo.zx': ( 'uint', 'both', ),
1100 'xmm8.lo.zx': ( 'uint', 'both', ),
1101 'xmm9.lo.zx': ( 'uint', 'both', ),
1102 'xmm10.lo.zx': ( 'uint', 'both', ),
1103 'xmm11.lo.zx': ( 'uint', 'both', ),
1104 'xmm12.lo.zx': ( 'uint', 'both', ),
1105 'xmm13.lo.zx': ( 'uint', 'both', ),
1106 'xmm14.lo.zx': ( 'uint', 'both', ),
1107 'xmm15.lo.zx': ( 'uint', 'both', ),
1108 'xmm0.dw0': ( 'uint', 'both', ),
1109 'xmm1.dw0': ( 'uint', 'both', ),
1110 'xmm2.dw0': ( 'uint', 'both', ),
1111 'xmm3.dw0': ( 'uint', 'both', ),
1112 'xmm4.dw0': ( 'uint', 'both', ),
1113 'xmm5.dw0': ( 'uint', 'both', ),
1114 'xmm6.dw0': ( 'uint', 'both', ),
1115 'xmm7.dw0': ( 'uint', 'both', ),
1116 'xmm8.dw0': ( 'uint', 'both', ),
1117 'xmm9.dw0': ( 'uint', 'both', ),
1118 'xmm10.dw0': ( 'uint', 'both', ),
1119 'xmm11.dw0': ( 'uint', 'both', ),
1120 'xmm12.dw0': ( 'uint', 'both', ),
1121 'xmm13.dw0': ( 'uint', 'both', ),
1122 'xmm14.dw0': ( 'uint', 'both', ),
1123 'xmm15_dw0': ( 'uint', 'both', ),
1124 # AVX registers.
1125 'ymm0': ( 'uint', 'both', ),
1126 'ymm1': ( 'uint', 'both', ),
1127 'ymm2': ( 'uint', 'both', ),
1128 'ymm3': ( 'uint', 'both', ),
1129 'ymm4': ( 'uint', 'both', ),
1130 'ymm5': ( 'uint', 'both', ),
1131 'ymm6': ( 'uint', 'both', ),
1132 'ymm7': ( 'uint', 'both', ),
1133 'ymm8': ( 'uint', 'both', ),
1134 'ymm9': ( 'uint', 'both', ),
1135 'ymm10': ( 'uint', 'both', ),
1136 'ymm11': ( 'uint', 'both', ),
1137 'ymm12': ( 'uint', 'both', ),
1138 'ymm13': ( 'uint', 'both', ),
1139 'ymm14': ( 'uint', 'both', ),
1140 'ymm15': ( 'uint', 'both', ),
1141
1142 # Special ones.
1143 'value.xcpt': ( 'uint', 'output', ),
1144 };
1145
1146 def __init__(self, sField, sOp, sValue, sType):
1147 assert sField in self.kdFields;
1148 assert sOp in self.kasOperators;
1149 self.sField = sField;
1150 self.sOp = sOp;
1151 self.sValue = sValue;
1152 self.sType = sType;
1153 assert isinstance(sField, str);
1154 assert isinstance(sOp, str);
1155 assert isinstance(sType, str);
1156 assert isinstance(sValue, str);
1157
1158
1159class TestSelector(object):
1160 """
1161 One selector for an instruction test.
1162 """
1163 ## Selector compare operators.
1164 kasCompareOps = [ '==', '!=' ];
1165 ## Selector variables and their valid values.
1166 kdVariables = {
1167 # Operand size.
1168 'size': {
1169 'o16': 'size_o16',
1170 'o32': 'size_o32',
1171 'o64': 'size_o64',
1172 },
1173 # VEX.L value.
1174 'vex.l': {
1175 '0': 'vexl_0',
1176 '1': 'vexl_1',
1177 },
1178 # Execution ring.
1179 'ring': {
1180 '0': 'ring_0',
1181 '1': 'ring_1',
1182 '2': 'ring_2',
1183 '3': 'ring_3',
1184 '0..2': 'ring_0_thru_2',
1185 '1..3': 'ring_1_thru_3',
1186 },
1187 # Basic code mode.
1188 'codebits': {
1189 '64': 'code_64bit',
1190 '32': 'code_32bit',
1191 '16': 'code_16bit',
1192 },
1193 # cpu modes.
1194 'mode': {
1195 'real': 'mode_real',
1196 'prot': 'mode_prot',
1197 'long': 'mode_long',
1198 'v86': 'mode_v86',
1199 'smm': 'mode_smm',
1200 'vmx': 'mode_vmx',
1201 'svm': 'mode_svm',
1202 },
1203 # paging on/off
1204 'paging': {
1205 'on': 'paging_on',
1206 'off': 'paging_off',
1207 },
1208 # CPU vendor
1209 'vendor': {
1210 'amd': 'vendor_amd',
1211 'intel': 'vendor_intel',
1212 'via': 'vendor_via',
1213 },
1214 };
1215 ## Selector shorthand predicates.
1216 ## These translates into variable expressions.
1217 kdPredicates = {
1218 'o16': 'size==o16',
1219 'o32': 'size==o32',
1220 'o64': 'size==o64',
1221 'ring0': 'ring==0',
1222 '!ring0': 'ring==1..3',
1223 'ring1': 'ring==1',
1224 'ring2': 'ring==2',
1225 'ring3': 'ring==3',
1226 'user': 'ring==3',
1227 'supervisor': 'ring==0..2',
1228 '16-bit': 'codebits==16',
1229 '32-bit': 'codebits==32',
1230 '64-bit': 'codebits==64',
1231 'real': 'mode==real',
1232 'prot': 'mode==prot',
1233 'long': 'mode==long',
1234 'v86': 'mode==v86',
1235 'smm': 'mode==smm',
1236 'vmx': 'mode==vmx',
1237 'svm': 'mode==svm',
1238 'paging': 'paging==on',
1239 '!paging': 'paging==off',
1240 'amd': 'vendor==amd',
1241 '!amd': 'vendor!=amd',
1242 'intel': 'vendor==intel',
1243 '!intel': 'vendor!=intel',
1244 'via': 'vendor==via',
1245 '!via': 'vendor!=via',
1246 };
1247
1248 def __init__(self, sVariable, sOp, sValue):
1249 assert sVariable in self.kdVariables;
1250 assert sOp in self.kasCompareOps;
1251 assert sValue in self.kdVariables[sVariable];
1252 self.sVariable = sVariable;
1253 self.sOp = sOp;
1254 self.sValue = sValue;
1255
1256
1257class InstructionTest(object):
1258 """
1259 Instruction test.
1260 """
1261
1262 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1263 self.oInstr = oInstr; # type: InstructionTest
1264 self.aoInputs = []; # type: list(TestInOut)
1265 self.aoOutputs = []; # type: list(TestInOut)
1266 self.aoSelectors = []; # type: list(TestSelector)
1267
1268 def toString(self, fRepr = False):
1269 """
1270 Converts it to string representation.
1271 """
1272 asWords = [];
1273 if self.aoSelectors:
1274 for oSelector in self.aoSelectors:
1275 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1276 asWords.append('/');
1277
1278 for oModifier in self.aoInputs:
1279 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1280
1281 asWords.append('->');
1282
1283 for oModifier in self.aoOutputs:
1284 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1285
1286 if fRepr:
1287 return '<' + ' '.join(asWords) + '>';
1288 return ' '.join(asWords);
1289
1290 def __str__(self):
1291 """ Provide string represenation. """
1292 return self.toString(False);
1293
1294 def __repr__(self):
1295 """ Provide unambigious string representation. """
1296 return self.toString(True);
1297
1298class Operand(object):
1299 """
1300 Instruction operand.
1301 """
1302
1303 def __init__(self, sWhere, sType):
1304 assert sWhere in g_kdOpLocations, sWhere;
1305 assert sType in g_kdOpTypes, sType;
1306 self.sWhere = sWhere; ##< g_kdOpLocations
1307 self.sType = sType; ##< g_kdOpTypes
1308
1309 def usesModRM(self):
1310 """ Returns True if using some form of ModR/M encoding. """
1311 return self.sType[0] in ['E', 'G', 'M'];
1312
1313
1314
1315class Instruction(object): # pylint: disable=too-many-instance-attributes
1316 """
1317 Instruction.
1318 """
1319
1320 def __init__(self, sSrcFile, iLine):
1321 ## @name Core attributes.
1322 ## @{
1323 self.sMnemonic = None;
1324 self.sBrief = None;
1325 self.asDescSections = []; # type: list(str)
1326 self.aoMaps = []; # type: list(InstructionMap)
1327 self.aoOperands = []; # type: list(Operand)
1328 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1329 self.sOpcode = None; # type: str
1330 self.sSubOpcode = None; # type: str
1331 self.sEncoding = None;
1332 self.asFlTest = None;
1333 self.asFlModify = None;
1334 self.asFlUndefined = None;
1335 self.asFlSet = None;
1336 self.asFlClear = None;
1337 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1338 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1339 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1340 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1341 self.aoTests = []; # type: list(InstructionTest)
1342 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1343 self.oCpuExpr = None; ##< Some CPU restriction expression...
1344 self.sGroup = None;
1345 self.fUnused = False; ##< Unused instruction.
1346 self.fInvalid = False; ##< Invalid instruction (like UD2).
1347 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1348 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1349 ## @}
1350
1351 ## @name Implementation attributes.
1352 ## @{
1353 self.sStats = None;
1354 self.sFunction = None;
1355 self.fStub = False;
1356 self.fUdStub = False;
1357 ## @}
1358
1359 ## @name Decoding info
1360 ## @{
1361 self.sSrcFile = sSrcFile;
1362 self.iLineCreated = iLine;
1363 self.iLineCompleted = None;
1364 self.cOpTags = 0;
1365 self.iLineFnIemOpMacro = -1;
1366 self.iLineMnemonicMacro = -1;
1367 ## @}
1368
1369 ## @name Intermediate input fields.
1370 ## @{
1371 self.sRawDisOpNo = None;
1372 self.asRawDisParams = [];
1373 self.sRawIemOpFlags = None;
1374 self.sRawOldOpcodes = None;
1375 self.asCopyTests = [];
1376 ## @}
1377
1378 def toString(self, fRepr = False):
1379 """ Turn object into a string. """
1380 aasFields = [];
1381
1382 aasFields.append(['opcode', self.sOpcode]);
1383 aasFields.append(['mnemonic', self.sMnemonic]);
1384 for iOperand, oOperand in enumerate(self.aoOperands):
1385 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1386 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1387 aasFields.append(['encoding', self.sEncoding]);
1388 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1389 aasFields.append(['disenum', self.sDisEnum]);
1390 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1391 aasFields.append(['group', self.sGroup]);
1392 if self.fUnused: aasFields.append(['unused', 'True']);
1393 if self.fInvalid: aasFields.append(['invalid', 'True']);
1394 aasFields.append(['invlstyle', self.sInvalidStyle]);
1395 aasFields.append(['fltest', self.asFlTest]);
1396 aasFields.append(['flmodify', self.asFlModify]);
1397 aasFields.append(['flundef', self.asFlUndefined]);
1398 aasFields.append(['flset', self.asFlSet]);
1399 aasFields.append(['flclear', self.asFlClear]);
1400 aasFields.append(['mincpu', self.sMinCpu]);
1401 aasFields.append(['stats', self.sStats]);
1402 aasFields.append(['sFunction', self.sFunction]);
1403 if self.fStub: aasFields.append(['fStub', 'True']);
1404 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1405 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1406 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1407 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1408
1409 sRet = '<' if fRepr else '';
1410 for sField, sValue in aasFields:
1411 if sValue != None:
1412 if len(sRet) > 1:
1413 sRet += '; ';
1414 sRet += '%s=%s' % (sField, sValue,);
1415 if fRepr:
1416 sRet += '>';
1417
1418 return sRet;
1419
1420 def __str__(self):
1421 """ Provide string represenation. """
1422 return self.toString(False);
1423
1424 def __repr__(self):
1425 """ Provide unambigious string representation. """
1426 return self.toString(True);
1427
1428 def getOpcodeByte(self):
1429 """
1430 Decodes sOpcode into a byte range integer value.
1431 Raises exception if sOpcode is None or invalid.
1432 """
1433 if self.sOpcode is None:
1434 raise Exception('No opcode byte for %s!' % (self,));
1435 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1436
1437 # Full hex byte form.
1438 if sOpcode[:2] == '0x':
1439 return int(sOpcode, 16);
1440
1441 # The /r form:
1442 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1443 return int(sOpcode[-1:]) << 3;
1444
1445 # The 11/r form:
1446 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1447 return (int(sOpcode[-1:]) << 3) | 0xc0;
1448
1449 # The !11/r form (returns mod=1):
1450 ## @todo this doesn't really work...
1451 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1452 return (int(sOpcode[-1:]) << 3) | 0x80;
1453
1454 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1455
1456 @staticmethod
1457 def _flagsToIntegerMask(asFlags):
1458 """
1459 Returns the integer mask value for asFlags.
1460 """
1461 uRet = 0;
1462 if asFlags:
1463 for sFlag in asFlags:
1464 sConstant = g_kdEFlagsMnemonics[sFlag];
1465 assert sConstant[0] != '!', sConstant
1466 uRet |= g_kdX86EFlagsConstants[sConstant];
1467 return uRet;
1468
1469 def getTestedFlagsMask(self):
1470 """ Returns asFlTest into a integer mask value """
1471 return self._flagsToIntegerMask(self.asFlTest);
1472
1473 def getModifiedFlagsMask(self):
1474 """ Returns asFlModify into a integer mask value """
1475 return self._flagsToIntegerMask(self.asFlModify);
1476
1477 def getUndefinedFlagsMask(self):
1478 """ Returns asFlUndefined into a integer mask value """
1479 return self._flagsToIntegerMask(self.asFlUndefined);
1480
1481 def getSetFlagsMask(self):
1482 """ Returns asFlSet into a integer mask value """
1483 return self._flagsToIntegerMask(self.asFlSet);
1484
1485 def getClearedFlagsMask(self):
1486 """ Returns asFlClear into a integer mask value """
1487 return self._flagsToIntegerMask(self.asFlClear);
1488
1489 def onlyInVexMaps(self):
1490 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1491 if not self.aoMaps:
1492 return False;
1493 for oMap in self.aoMaps:
1494 if not oMap.isVexMap():
1495 return False;
1496 return True;
1497
1498
1499
1500## All the instructions.
1501g_aoAllInstructions = []; # type: list(Instruction)
1502
1503## All the instructions indexed by statistics name (opstat).
1504g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1505
1506## All the instructions indexed by function name (opfunction).
1507g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1508
1509## Instructions tagged by oponlytest
1510g_aoOnlyTestInstructions = []; # type: list(Instruction)
1511
1512## Instruction maps.
1513g_dInstructionMaps = {
1514 'one': InstructionMap('one'),
1515 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1516 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1517 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1518 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1519 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1520 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1521 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1522 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1523 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1524 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1525 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1526 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1527 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1528 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1529 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1530 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1531 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1532 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1533 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1534
1535 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1536 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1537 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1538 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1539 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1540 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1541 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1542 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1543 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1544 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1545 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1546 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1547 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1548 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1549
1550 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1551 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1552
1553 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1554 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1555 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1556 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1557 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1558 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1559
1560 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1561 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1562
1563 '3dnow': InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1564 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1565 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1566 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1567 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1568 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1569 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1570 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1571};
1572
1573
1574
1575class ParserException(Exception):
1576 """ Parser exception """
1577 def __init__(self, sMessage):
1578 Exception.__init__(self, sMessage);
1579
1580
1581class SimpleParser(object):
1582 """
1583 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1584 """
1585
1586 ## @name Parser state.
1587 ## @{
1588 kiCode = 0;
1589 kiCommentMulti = 1;
1590 ## @}
1591
1592 def __init__(self, sSrcFile, asLines, sDefaultMap):
1593 self.sSrcFile = sSrcFile;
1594 self.asLines = asLines;
1595 self.iLine = 0;
1596 self.iState = self.kiCode;
1597 self.sComment = '';
1598 self.iCommentLine = 0;
1599 self.aoCurInstrs = [];
1600
1601 assert sDefaultMap in g_dInstructionMaps;
1602 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1603
1604 self.cTotalInstr = 0;
1605 self.cTotalStubs = 0;
1606 self.cTotalTagged = 0;
1607
1608 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1609 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1610 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1611 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1612 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1613 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1614 self.fDebug = True;
1615
1616 self.dTagHandlers = {
1617 '@opbrief': self.parseTagOpBrief,
1618 '@opdesc': self.parseTagOpDesc,
1619 '@opmnemonic': self.parseTagOpMnemonic,
1620 '@op1': self.parseTagOpOperandN,
1621 '@op2': self.parseTagOpOperandN,
1622 '@op3': self.parseTagOpOperandN,
1623 '@op4': self.parseTagOpOperandN,
1624 '@oppfx': self.parseTagOpPfx,
1625 '@opmaps': self.parseTagOpMaps,
1626 '@opcode': self.parseTagOpcode,
1627 '@opcodesub': self.parseTagOpcodeSub,
1628 '@openc': self.parseTagOpEnc,
1629 '@opfltest': self.parseTagOpEFlags,
1630 '@opflmodify': self.parseTagOpEFlags,
1631 '@opflundef': self.parseTagOpEFlags,
1632 '@opflset': self.parseTagOpEFlags,
1633 '@opflclear': self.parseTagOpEFlags,
1634 '@ophints': self.parseTagOpHints,
1635 '@opdisenum': self.parseTagOpDisEnum,
1636 '@opmincpu': self.parseTagOpMinCpu,
1637 '@opcpuid': self.parseTagOpCpuId,
1638 '@opgroup': self.parseTagOpGroup,
1639 '@opunused': self.parseTagOpUnusedInvalid,
1640 '@opinvalid': self.parseTagOpUnusedInvalid,
1641 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1642 '@optest': self.parseTagOpTest,
1643 '@optestign': self.parseTagOpTestIgnore,
1644 '@optestignore': self.parseTagOpTestIgnore,
1645 '@opcopytests': self.parseTagOpCopyTests,
1646 '@oponly': self.parseTagOpOnlyTest,
1647 '@oponlytest': self.parseTagOpOnlyTest,
1648 '@opxcpttype': self.parseTagOpXcptType,
1649 '@opstats': self.parseTagOpStats,
1650 '@opfunction': self.parseTagOpFunction,
1651 '@opdone': self.parseTagOpDone,
1652 };
1653 for i in range(48):
1654 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1655 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1656
1657 self.asErrors = [];
1658
1659 def raiseError(self, sMessage):
1660 """
1661 Raise error prefixed with the source and line number.
1662 """
1663 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1664
1665 def raiseCommentError(self, iLineInComment, sMessage):
1666 """
1667 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1668 """
1669 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1670
1671 def error(self, sMessage):
1672 """
1673 Adds an error.
1674 returns False;
1675 """
1676 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1677 return False;
1678
1679 def errorComment(self, iLineInComment, sMessage):
1680 """
1681 Adds a comment error.
1682 returns False;
1683 """
1684 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1685 return False;
1686
1687 def printErrors(self):
1688 """
1689 Print the errors to stderr.
1690 Returns number of errors.
1691 """
1692 if self.asErrors:
1693 sys.stderr.write(u''.join(self.asErrors));
1694 return len(self.asErrors);
1695
1696 def debug(self, sMessage):
1697 """
1698 For debugging.
1699 """
1700 if self.fDebug:
1701 print('debug: %s' % (sMessage,));
1702
1703
1704 def addInstruction(self, iLine = None):
1705 """
1706 Adds an instruction.
1707 """
1708 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1709 g_aoAllInstructions.append(oInstr);
1710 self.aoCurInstrs.append(oInstr);
1711 return oInstr;
1712
1713 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1714 """
1715 Derives the mnemonic and operands from a IEM stats base name like string.
1716 """
1717 if oInstr.sMnemonic is None:
1718 asWords = sStats.split('_');
1719 oInstr.sMnemonic = asWords[0].lower();
1720 if len(asWords) > 1 and not oInstr.aoOperands:
1721 for sType in asWords[1:]:
1722 if sType in g_kdOpTypes:
1723 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1724 else:
1725 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1726 return False;
1727 return True;
1728
1729 def doneInstructionOne(self, oInstr, iLine):
1730 """
1731 Complete the parsing by processing, validating and expanding raw inputs.
1732 """
1733 assert oInstr.iLineCompleted is None;
1734 oInstr.iLineCompleted = iLine;
1735
1736 #
1737 # Specified instructions.
1738 #
1739 if oInstr.cOpTags > 0:
1740 if oInstr.sStats is None:
1741 pass;
1742
1743 #
1744 # Unspecified legacy stuff. We generally only got a few things to go on here.
1745 # /** Opcode 0x0f 0x00 /0. */
1746 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1747 #
1748 else:
1749 #if oInstr.sRawOldOpcodes:
1750 #
1751 #if oInstr.sMnemonic:
1752 pass;
1753
1754 #
1755 # Common defaults.
1756 #
1757
1758 # Guess mnemonic and operands from stats if the former is missing.
1759 if oInstr.sMnemonic is None:
1760 if oInstr.sStats is not None:
1761 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1762 elif oInstr.sFunction is not None:
1763 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1764
1765 # Derive the disassembler op enum constant from the mnemonic.
1766 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1767 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1768
1769 # Derive the IEM statistics base name from mnemonic and operand types.
1770 if oInstr.sStats is None:
1771 if oInstr.sFunction is not None:
1772 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1773 elif oInstr.sMnemonic is not None:
1774 oInstr.sStats = oInstr.sMnemonic;
1775 for oOperand in oInstr.aoOperands:
1776 if oOperand.sType:
1777 oInstr.sStats += '_' + oOperand.sType;
1778
1779 # Derive the IEM function name from mnemonic and operand types.
1780 if oInstr.sFunction is None:
1781 if oInstr.sMnemonic is not None:
1782 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1783 for oOperand in oInstr.aoOperands:
1784 if oOperand.sType:
1785 oInstr.sFunction += '_' + oOperand.sType;
1786 elif oInstr.sStats:
1787 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1788
1789 #
1790 # Apply default map and then add the instruction to all it's groups.
1791 #
1792 if not oInstr.aoMaps:
1793 oInstr.aoMaps = [ self.oDefaultMap, ];
1794 for oMap in oInstr.aoMaps:
1795 oMap.aoInstructions.append(oInstr);
1796
1797 #
1798 # Derive encoding from operands and maps.
1799 #
1800 if oInstr.sEncoding is None:
1801 if not oInstr.aoOperands:
1802 if oInstr.fUnused and oInstr.sSubOpcode:
1803 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
1804 else:
1805 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
1806 elif oInstr.aoOperands[0].usesModRM():
1807 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
1808 or oInstr.onlyInVexMaps():
1809 oInstr.sEncoding = 'VEX.ModR/M';
1810 else:
1811 oInstr.sEncoding = 'ModR/M';
1812
1813 #
1814 # Check the opstat value and add it to the opstat indexed dictionary.
1815 #
1816 if oInstr.sStats:
1817 if oInstr.sStats not in g_dAllInstructionsByStat:
1818 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1819 else:
1820 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1821 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1822
1823 #
1824 # Add to function indexed dictionary. We allow multiple instructions per function.
1825 #
1826 if oInstr.sFunction:
1827 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1828 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1829 else:
1830 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1831
1832 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1833 return True;
1834
1835 def doneInstructions(self, iLineInComment = None):
1836 """
1837 Done with current instruction.
1838 """
1839 for oInstr in self.aoCurInstrs:
1840 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1841 if oInstr.fStub:
1842 self.cTotalStubs += 1;
1843
1844 self.cTotalInstr += len(self.aoCurInstrs);
1845
1846 self.sComment = '';
1847 self.aoCurInstrs = [];
1848 return True;
1849
1850 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1851 """
1852 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1853 is False, only None values and empty strings are replaced.
1854 """
1855 for oInstr in self.aoCurInstrs:
1856 if fOverwrite is not True:
1857 oOldValue = getattr(oInstr, sAttrib);
1858 if oOldValue is not None:
1859 continue;
1860 setattr(oInstr, sAttrib, oValue);
1861
1862 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1863 """
1864 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1865 If fOverwrite is False, only None values and empty strings are replaced.
1866 """
1867 for oInstr in self.aoCurInstrs:
1868 aoArray = getattr(oInstr, sAttrib);
1869 while len(aoArray) <= iEntry:
1870 aoArray.append(None);
1871 if fOverwrite is True or aoArray[iEntry] is None:
1872 aoArray[iEntry] = oValue;
1873
1874 def parseCommentOldOpcode(self, asLines):
1875 """ Deals with 'Opcode 0xff /4' like comments """
1876 asWords = asLines[0].split();
1877 if len(asWords) >= 2 \
1878 and asWords[0] == 'Opcode' \
1879 and ( asWords[1].startswith('0x')
1880 or asWords[1].startswith('0X')):
1881 asWords = asWords[:1];
1882 for iWord, sWord in enumerate(asWords):
1883 if sWord.startswith('0X'):
1884 sWord = '0x' + sWord[:2];
1885 asWords[iWord] = asWords;
1886 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1887
1888 return False;
1889
1890 def ensureInstructionForOpTag(self, iTagLine):
1891 """ Ensure there is an instruction for the op-tag being parsed. """
1892 if not self.aoCurInstrs:
1893 self.addInstruction(self.iCommentLine + iTagLine);
1894 for oInstr in self.aoCurInstrs:
1895 oInstr.cOpTags += 1;
1896 if oInstr.cOpTags == 1:
1897 self.cTotalTagged += 1;
1898 return self.aoCurInstrs[-1];
1899
1900 @staticmethod
1901 def flattenSections(aasSections):
1902 """
1903 Flattens multiline sections into stripped single strings.
1904 Returns list of strings, on section per string.
1905 """
1906 asRet = [];
1907 for asLines in aasSections:
1908 if asLines:
1909 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1910 return asRet;
1911
1912 @staticmethod
1913 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1914 """
1915 Flattens sections into a simple stripped string with newlines as
1916 section breaks. The final section does not sport a trailing newline.
1917 """
1918 # Typical: One section with a single line.
1919 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1920 return aasSections[0][0].strip();
1921
1922 sRet = '';
1923 for iSection, asLines in enumerate(aasSections):
1924 if asLines:
1925 if iSection > 0:
1926 sRet += sSectionSep;
1927 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1928 return sRet;
1929
1930
1931
1932 ## @name Tag parsers
1933 ## @{
1934
1935 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1936 """
1937 Tag: \@opbrief
1938 Value: Text description, multiple sections, appended.
1939
1940 Brief description. If not given, it's the first sentence from @opdesc.
1941 """
1942 oInstr = self.ensureInstructionForOpTag(iTagLine);
1943
1944 # Flatten and validate the value.
1945 sBrief = self.flattenAllSections(aasSections);
1946 if not sBrief:
1947 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1948 if sBrief[-1] != '.':
1949 sBrief = sBrief + '.';
1950 if len(sBrief) > 180:
1951 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1952 offDot = sBrief.find('.');
1953 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1954 offDot = sBrief.find('.', offDot + 1);
1955 if offDot >= 0 and offDot != len(sBrief) - 1:
1956 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1957
1958 # Update the instruction.
1959 if oInstr.sBrief is not None:
1960 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1961 % (sTag, oInstr.sBrief, sBrief,));
1962 _ = iEndLine;
1963 return True;
1964
1965 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1966 """
1967 Tag: \@opdesc
1968 Value: Text description, multiple sections, appended.
1969
1970 It is used to describe instructions.
1971 """
1972 oInstr = self.ensureInstructionForOpTag(iTagLine);
1973 if aasSections:
1974 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1975 return True;
1976
1977 _ = sTag; _ = iEndLine;
1978 return True;
1979
1980 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1981 """
1982 Tag: @opmenmonic
1983 Value: mnemonic
1984
1985 The 'mnemonic' value must be a valid C identifier string. Because of
1986 prefixes, groups and whatnot, there times when the mnemonic isn't that
1987 of an actual assembler mnemonic.
1988 """
1989 oInstr = self.ensureInstructionForOpTag(iTagLine);
1990
1991 # Flatten and validate the value.
1992 sMnemonic = self.flattenAllSections(aasSections);
1993 if not self.oReMnemonic.match(sMnemonic):
1994 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1995 if oInstr.sMnemonic is not None:
1996 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1997 % (sTag, oInstr.sMnemonic, sMnemonic,));
1998 oInstr.sMnemonic = sMnemonic
1999
2000 _ = iEndLine;
2001 return True;
2002
2003 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
2004 """
2005 Tags: \@op1, \@op2, \@op3, \@op4
2006 Value: [where:]type
2007
2008 The 'where' value indicates where the operand is found, like the 'reg'
2009 part of the ModR/M encoding. See Instruction.kdOperandLocations for
2010 a list.
2011
2012 The 'type' value indicates the operand type. These follow the types
2013 given in the opcode tables in the CPU reference manuals.
2014 See Instruction.kdOperandTypes for a list.
2015
2016 """
2017 oInstr = self.ensureInstructionForOpTag(iTagLine);
2018 idxOp = int(sTag[-1]) - 1;
2019 assert idxOp >= 0 and idxOp < 4;
2020
2021 # flatten, split up, and validate the "where:type" value.
2022 sFlattened = self.flattenAllSections(aasSections);
2023 asSplit = sFlattened.split(':');
2024 if len(asSplit) == 1:
2025 sType = asSplit[0];
2026 sWhere = None;
2027 elif len(asSplit) == 2:
2028 (sWhere, sType) = asSplit;
2029 else:
2030 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
2031
2032 if sType not in g_kdOpTypes:
2033 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2034 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
2035 if sWhere is None:
2036 sWhere = g_kdOpTypes[sType][1];
2037 elif sWhere not in g_kdOpLocations:
2038 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2039 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
2040
2041 # Insert the operand, refusing to overwrite an existing one.
2042 while idxOp >= len(oInstr.aoOperands):
2043 oInstr.aoOperands.append(None);
2044 if oInstr.aoOperands[idxOp] is not None:
2045 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
2046 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
2047 sWhere, sType,));
2048 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
2049
2050 _ = iEndLine;
2051 return True;
2052
2053 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
2054 """
2055 Tag: \@opmaps
2056 Value: map[,map2]
2057
2058 Indicates which maps the instruction is in. There is a default map
2059 associated with each input file.
2060 """
2061 oInstr = self.ensureInstructionForOpTag(iTagLine);
2062
2063 # Flatten, split up and validate the value.
2064 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
2065 asMaps = sFlattened.split(',');
2066 if not asMaps:
2067 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2068 for sMap in asMaps:
2069 if sMap not in g_dInstructionMaps:
2070 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
2071 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2072
2073 # Add the maps to the current list. Throw errors on duplicates.
2074 for oMap in oInstr.aoMaps:
2075 if oMap.sName in asMaps:
2076 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2077
2078 for sMap in asMaps:
2079 oMap = g_dInstructionMaps[sMap];
2080 if oMap not in oInstr.aoMaps:
2081 oInstr.aoMaps.append(oMap);
2082 else:
2083 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2084
2085 _ = iEndLine;
2086 return True;
2087
2088 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2089 """
2090 Tag: \@oppfx
2091 Value: n/a|none|0x66|0xf3|0xf2
2092
2093 Required prefix for the instruction. (In a (E)VEX context this is the
2094 value of the 'pp' field rather than an actual prefix.)
2095 """
2096 oInstr = self.ensureInstructionForOpTag(iTagLine);
2097
2098 # Flatten and validate the value.
2099 sFlattened = self.flattenAllSections(aasSections);
2100 asPrefixes = sFlattened.split();
2101 if len(asPrefixes) > 1:
2102 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2103
2104 sPrefix = asPrefixes[0].lower();
2105 if sPrefix == 'none':
2106 sPrefix = 'none';
2107 elif sPrefix == 'n/a':
2108 sPrefix = None;
2109 else:
2110 if len(sPrefix) == 2:
2111 sPrefix = '0x' + sPrefix;
2112 if not _isValidOpcodeByte(sPrefix):
2113 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2114
2115 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2116 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2117
2118 # Set it.
2119 if oInstr.sPrefix is not None:
2120 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2121 oInstr.sPrefix = sPrefix;
2122
2123 _ = iEndLine;
2124 return True;
2125
2126 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2127 """
2128 Tag: \@opcode
2129 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2130
2131 The opcode byte or sub-byte for the instruction in the context of a map.
2132 """
2133 oInstr = self.ensureInstructionForOpTag(iTagLine);
2134
2135 # Flatten and validate the value.
2136 sOpcode = self.flattenAllSections(aasSections);
2137 if _isValidOpcodeByte(sOpcode):
2138 pass;
2139 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2140 pass;
2141 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2142 pass;
2143 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2144 pass;
2145 else:
2146 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2147
2148 # Set it.
2149 if oInstr.sOpcode is not None:
2150 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2151 oInstr.sOpcode = sOpcode;
2152
2153 _ = iEndLine;
2154 return True;
2155
2156 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2157 """
2158 Tag: \@opcodesub
2159 Value: none | 11 mr/reg | !11 mr/reg
2160
2161 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2162 represents exactly two different instructions. The more proper way would
2163 be to go via maps with two members, but this is faster.
2164 """
2165 oInstr = self.ensureInstructionForOpTag(iTagLine);
2166
2167 # Flatten and validate the value.
2168 sSubOpcode = self.flattenAllSections(aasSections);
2169 if sSubOpcode not in g_kdSubOpcodes:
2170 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2171 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2172
2173 # Set it.
2174 if oInstr.sSubOpcode is not None:
2175 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2176 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2177 oInstr.sSubOpcode = sSubOpcode;
2178
2179 _ = iEndLine;
2180 return True;
2181
2182 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2183 """
2184 Tag: \@openc
2185 Value: ModR/M|fixed|prefix|<map name>
2186
2187 The instruction operand encoding style.
2188 """
2189 oInstr = self.ensureInstructionForOpTag(iTagLine);
2190
2191 # Flatten and validate the value.
2192 sEncoding = self.flattenAllSections(aasSections);
2193 if sEncoding in g_kdEncodings:
2194 pass;
2195 elif sEncoding in g_dInstructionMaps:
2196 pass;
2197 elif not _isValidOpcodeByte(sEncoding):
2198 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2199
2200 # Set it.
2201 if oInstr.sEncoding is not None:
2202 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2203 % ( sTag, oInstr.sEncoding, sEncoding,));
2204 oInstr.sEncoding = sEncoding;
2205
2206 _ = iEndLine;
2207 return True;
2208
2209 ## EFlags tag to Instruction attribute name.
2210 kdOpFlagToAttr = {
2211 '@opfltest': 'asFlTest',
2212 '@opflmodify': 'asFlModify',
2213 '@opflundef': 'asFlUndefined',
2214 '@opflset': 'asFlSet',
2215 '@opflclear': 'asFlClear',
2216 };
2217
2218 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2219 """
2220 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2221 Value: <eflags specifier>
2222
2223 """
2224 oInstr = self.ensureInstructionForOpTag(iTagLine);
2225
2226 # Flatten, split up and validate the values.
2227 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2228 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2229 asFlags = [];
2230 else:
2231 fRc = True;
2232 for iFlag, sFlag in enumerate(asFlags):
2233 if sFlag not in g_kdEFlagsMnemonics:
2234 if sFlag.strip() in g_kdEFlagsMnemonics:
2235 asFlags[iFlag] = sFlag.strip();
2236 else:
2237 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2238 if not fRc:
2239 return False;
2240
2241 # Set them.
2242 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2243 if asOld is not None:
2244 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2245 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2246
2247 _ = iEndLine;
2248 return True;
2249
2250 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2251 """
2252 Tag: \@ophints
2253 Value: Comma or space separated list of flags and hints.
2254
2255 This covers the disassembler flags table and more.
2256 """
2257 oInstr = self.ensureInstructionForOpTag(iTagLine);
2258
2259 # Flatten as a space separated list, split it up and validate the values.
2260 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2261 if len(asHints) == 1 and asHints[0].lower() == 'none':
2262 asHints = [];
2263 else:
2264 fRc = True;
2265 for iHint, sHint in enumerate(asHints):
2266 if sHint not in g_kdHints:
2267 if sHint.strip() in g_kdHints:
2268 sHint[iHint] = sHint.strip();
2269 else:
2270 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2271 if not fRc:
2272 return False;
2273
2274 # Append them.
2275 for sHint in asHints:
2276 if sHint not in oInstr.dHints:
2277 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2278 else:
2279 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2280
2281 _ = iEndLine;
2282 return True;
2283
2284 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2285 """
2286 Tag: \@opdisenum
2287 Value: OP_XXXX
2288
2289 This is for select a specific (legacy) disassembler enum value for the
2290 instruction.
2291 """
2292 oInstr = self.ensureInstructionForOpTag(iTagLine);
2293
2294 # Flatten and split.
2295 asWords = self.flattenAllSections(aasSections).split();
2296 if len(asWords) != 1:
2297 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2298 if not asWords:
2299 return False;
2300 sDisEnum = asWords[0];
2301 if not self.oReDisEnum.match(sDisEnum):
2302 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2303 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2304
2305 # Set it.
2306 if oInstr.sDisEnum is not None:
2307 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2308 oInstr.sDisEnum = sDisEnum;
2309
2310 _ = iEndLine;
2311 return True;
2312
2313 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2314 """
2315 Tag: \@opmincpu
2316 Value: <simple CPU name>
2317
2318 Indicates when this instruction was introduced.
2319 """
2320 oInstr = self.ensureInstructionForOpTag(iTagLine);
2321
2322 # Flatten the value, split into words, make sure there's just one, valid it.
2323 asCpus = self.flattenAllSections(aasSections).split();
2324 if len(asCpus) > 1:
2325 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2326
2327 sMinCpu = asCpus[0];
2328 if sMinCpu in g_kdCpuNames:
2329 oInstr.sMinCpu = sMinCpu;
2330 else:
2331 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2332 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2333
2334 # Set it.
2335 if oInstr.sMinCpu is None:
2336 oInstr.sMinCpu = sMinCpu;
2337 elif oInstr.sMinCpu != sMinCpu:
2338 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2339
2340 _ = iEndLine;
2341 return True;
2342
2343 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2344 """
2345 Tag: \@opcpuid
2346 Value: none | <CPUID flag specifier>
2347
2348 CPUID feature bit which is required for the instruction to be present.
2349 """
2350 oInstr = self.ensureInstructionForOpTag(iTagLine);
2351
2352 # Flatten as a space separated list, split it up and validate the values.
2353 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2354 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2355 asCpuIds = [];
2356 else:
2357 fRc = True;
2358 for iCpuId, sCpuId in enumerate(asCpuIds):
2359 if sCpuId not in g_kdCpuIdFlags:
2360 if sCpuId.strip() in g_kdCpuIdFlags:
2361 sCpuId[iCpuId] = sCpuId.strip();
2362 else:
2363 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2364 if not fRc:
2365 return False;
2366
2367 # Append them.
2368 for sCpuId in asCpuIds:
2369 if sCpuId not in oInstr.asCpuIds:
2370 oInstr.asCpuIds.append(sCpuId);
2371 else:
2372 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2373
2374 _ = iEndLine;
2375 return True;
2376
2377 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2378 """
2379 Tag: \@opgroup
2380 Value: op_grp1[_subgrp2[_subsubgrp3]]
2381
2382 Instruction grouping.
2383 """
2384 oInstr = self.ensureInstructionForOpTag(iTagLine);
2385
2386 # Flatten as a space separated list, split it up and validate the values.
2387 asGroups = self.flattenAllSections(aasSections).split();
2388 if len(asGroups) != 1:
2389 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2390 sGroup = asGroups[0];
2391 if not self.oReGroupName.match(sGroup):
2392 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2393 % (sTag, sGroup, self.oReGroupName.pattern));
2394
2395 # Set it.
2396 if oInstr.sGroup is not None:
2397 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2398 oInstr.sGroup = sGroup;
2399
2400 _ = iEndLine;
2401 return True;
2402
2403 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2404 """
2405 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2406 Value: <invalid opcode behaviour style>
2407
2408 The \@opunused indicates the specification is for a currently unused
2409 instruction encoding.
2410
2411 The \@opinvalid indicates the specification is for an invalid currently
2412 instruction encoding (like UD2).
2413
2414 The \@opinvlstyle just indicates how CPUs decode the instruction when
2415 not supported (\@opcpuid, \@opmincpu) or disabled.
2416 """
2417 oInstr = self.ensureInstructionForOpTag(iTagLine);
2418
2419 # Flatten as a space separated list, split it up and validate the values.
2420 asStyles = self.flattenAllSections(aasSections).split();
2421 if len(asStyles) != 1:
2422 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2423 sStyle = asStyles[0];
2424 if sStyle not in g_kdInvalidStyles:
2425 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2426 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2427 # Set it.
2428 if oInstr.sInvalidStyle is not None:
2429 return self.errorComment(iTagLine,
2430 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2431 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2432 oInstr.sInvalidStyle = sStyle;
2433 if sTag == '@opunused':
2434 oInstr.fUnused = True;
2435 elif sTag == '@opinvalid':
2436 oInstr.fInvalid = True;
2437
2438 _ = iEndLine;
2439 return True;
2440
2441 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2442 """
2443 Tag: \@optest
2444 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2445 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2446
2447 The main idea here is to generate basic instruction tests.
2448
2449 The probably simplest way of handling the diverse input, would be to use
2450 it to produce size optimized byte code for a simple interpreter that
2451 modifies the register input and output states.
2452
2453 An alternative to the interpreter would be creating multiple tables,
2454 but that becomes rather complicated wrt what goes where and then to use
2455 them in an efficient manner.
2456 """
2457 oInstr = self.ensureInstructionForOpTag(iTagLine);
2458
2459 #
2460 # Do it section by section.
2461 #
2462 for asSectionLines in aasSections:
2463 #
2464 # Sort the input into outputs, inputs and selector conditions.
2465 #
2466 sFlatSection = self.flattenAllSections([asSectionLines,]);
2467 if not sFlatSection:
2468 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2469 continue;
2470 oTest = InstructionTest(oInstr);
2471
2472 asSelectors = [];
2473 asInputs = [];
2474 asOutputs = [];
2475 asCur = asOutputs;
2476 fRc = True;
2477 asWords = sFlatSection.split();
2478 for iWord in range(len(asWords) - 1, -1, -1):
2479 sWord = asWords[iWord];
2480 # Check for array switchers.
2481 if sWord == '->':
2482 if asCur != asOutputs:
2483 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2484 break;
2485 asCur = asInputs;
2486 elif sWord == '/':
2487 if asCur != asInputs:
2488 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2489 break;
2490 asCur = asSelectors;
2491 else:
2492 asCur.insert(0, sWord);
2493
2494 #
2495 # Validate and add selectors.
2496 #
2497 for sCond in asSelectors:
2498 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2499 oSelector = None;
2500 for sOp in TestSelector.kasCompareOps:
2501 off = sCondExp.find(sOp);
2502 if off >= 0:
2503 sVariable = sCondExp[:off];
2504 sValue = sCondExp[off + len(sOp):];
2505 if sVariable in TestSelector.kdVariables:
2506 if sValue in TestSelector.kdVariables[sVariable]:
2507 oSelector = TestSelector(sVariable, sOp, sValue);
2508 else:
2509 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2510 % ( sTag, sValue, sCond,
2511 TestSelector.kdVariables[sVariable].keys(),));
2512 else:
2513 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2514 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2515 break;
2516 if oSelector is not None:
2517 for oExisting in oTest.aoSelectors:
2518 if oExisting.sVariable == oSelector.sVariable:
2519 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2520 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2521 oTest.aoSelectors.append(oSelector);
2522 else:
2523 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2524
2525 #
2526 # Validate outputs and inputs, adding them to the test as we go along.
2527 #
2528 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2529 asValidFieldKinds = [ 'both', sDesc, ];
2530 for sItem in asItems:
2531 oItem = None;
2532 for sOp in TestInOut.kasOperators:
2533 off = sItem.find(sOp);
2534 if off < 0:
2535 continue;
2536 sField = sItem[:off];
2537 sValueType = sItem[off + len(sOp):];
2538 if sField in TestInOut.kdFields \
2539 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2540 asSplit = sValueType.split(':', 1);
2541 sValue = asSplit[0];
2542 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2543 if sType in TestInOut.kdTypes:
2544 oValid = TestInOut.kdTypes[sType].validate(sValue);
2545 if oValid is True:
2546 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2547 oItem = TestInOut(sField, sOp, sValue, sType);
2548 else:
2549 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2550 % ( sTag, sDesc, sItem, ));
2551 else:
2552 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2553 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2554 else:
2555 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2556 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2557 else:
2558 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2559 % ( sTag, sDesc, sField, sItem,
2560 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2561 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2562 break;
2563 if oItem is not None:
2564 for oExisting in aoDst:
2565 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2566 self.errorComment(iTagLine,
2567 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2568 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2569 aoDst.append(oItem);
2570 else:
2571 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2572
2573 #
2574 # .
2575 #
2576 if fRc:
2577 oInstr.aoTests.append(oTest);
2578 else:
2579 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2580 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2581 % (sTag, asSelectors, asInputs, asOutputs,));
2582
2583 _ = iEndLine;
2584 return True;
2585
2586 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2587 """
2588 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2589 """
2590 oInstr = self.ensureInstructionForOpTag(iTagLine);
2591
2592 iTest = 0;
2593 if sTag[-1] == ']':
2594 iTest = int(sTag[8:-1]);
2595 else:
2596 iTest = int(sTag[7:]);
2597
2598 if iTest != len(oInstr.aoTests):
2599 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2600 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2601
2602 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2603 """
2604 Tag: \@optestign | \@optestignore
2605 Value: <value is ignored>
2606
2607 This is a simple trick to ignore a test while debugging another.
2608
2609 See also \@oponlytest.
2610 """
2611 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2612 return True;
2613
2614 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2615 """
2616 Tag: \@opcopytests
2617 Value: <opstat | function> [..]
2618 Example: \@opcopytests add_Eb_Gb
2619
2620 Trick to avoid duplicating tests for different encodings of the same
2621 operation.
2622 """
2623 oInstr = self.ensureInstructionForOpTag(iTagLine);
2624
2625 # Flatten, validate and append the copy job to the instruction. We execute
2626 # them after parsing all the input so we can handle forward references.
2627 asToCopy = self.flattenAllSections(aasSections).split();
2628 if not asToCopy:
2629 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2630 for sToCopy in asToCopy:
2631 if sToCopy not in oInstr.asCopyTests:
2632 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2633 oInstr.asCopyTests.append(sToCopy);
2634 else:
2635 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2636 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2637 else:
2638 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2639
2640 _ = iEndLine;
2641 return True;
2642
2643 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2644 """
2645 Tag: \@oponlytest | \@oponly
2646 Value: none
2647
2648 Only test instructions with this tag. This is a trick that is handy
2649 for singling out one or two new instructions or tests.
2650
2651 See also \@optestignore.
2652 """
2653 oInstr = self.ensureInstructionForOpTag(iTagLine);
2654
2655 # Validate and add instruction to only test dictionary.
2656 sValue = self.flattenAllSections(aasSections).strip();
2657 if sValue:
2658 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2659
2660 if oInstr not in g_aoOnlyTestInstructions:
2661 g_aoOnlyTestInstructions.append(oInstr);
2662
2663 _ = iEndLine;
2664 return True;
2665
2666 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2667 """
2668 Tag: \@opxcpttype
2669 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2670
2671 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2672 """
2673 oInstr = self.ensureInstructionForOpTag(iTagLine);
2674
2675 # Flatten as a space separated list, split it up and validate the values.
2676 asTypes = self.flattenAllSections(aasSections).split();
2677 if len(asTypes) != 1:
2678 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2679 sType = asTypes[0];
2680 if sType not in g_kdXcptTypes:
2681 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2682 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2683 # Set it.
2684 if oInstr.sXcptType is not None:
2685 return self.errorComment(iTagLine,
2686 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2687 % ( sTag, oInstr.sXcptType, sType,));
2688 oInstr.sXcptType = sType;
2689
2690 _ = iEndLine;
2691 return True;
2692
2693 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2694 """
2695 Tag: \@opfunction
2696 Value: <VMM function name>
2697
2698 This is for explicitly setting the IEM function name. Normally we pick
2699 this up from the FNIEMOP_XXX macro invocation after the description, or
2700 generate it from the mnemonic and operands.
2701
2702 It it thought it maybe necessary to set it when specifying instructions
2703 which implementation isn't following immediately or aren't implemented yet.
2704 """
2705 oInstr = self.ensureInstructionForOpTag(iTagLine);
2706
2707 # Flatten and validate the value.
2708 sFunction = self.flattenAllSections(aasSections);
2709 if not self.oReFunctionName.match(sFunction):
2710 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2711 % (sTag, sFunction, self.oReFunctionName.pattern));
2712
2713 if oInstr.sFunction is not None:
2714 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2715 % (sTag, oInstr.sFunction, sFunction,));
2716 oInstr.sFunction = sFunction;
2717
2718 _ = iEndLine;
2719 return True;
2720
2721 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2722 """
2723 Tag: \@opstats
2724 Value: <VMM statistics base name>
2725
2726 This is for explicitly setting the statistics name. Normally we pick
2727 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2728 the mnemonic and operands.
2729
2730 It it thought it maybe necessary to set it when specifying instructions
2731 which implementation isn't following immediately or aren't implemented yet.
2732 """
2733 oInstr = self.ensureInstructionForOpTag(iTagLine);
2734
2735 # Flatten and validate the value.
2736 sStats = self.flattenAllSections(aasSections);
2737 if not self.oReStatsName.match(sStats):
2738 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2739 % (sTag, sStats, self.oReStatsName.pattern));
2740
2741 if oInstr.sStats is not None:
2742 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2743 % (sTag, oInstr.sStats, sStats,));
2744 oInstr.sStats = sStats;
2745
2746 _ = iEndLine;
2747 return True;
2748
2749 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2750 """
2751 Tag: \@opdone
2752 Value: none
2753
2754 Used to explictily flush the instructions that have been specified.
2755 """
2756 sFlattened = self.flattenAllSections(aasSections);
2757 if sFlattened != '':
2758 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2759 _ = sTag; _ = iEndLine;
2760 return self.doneInstructions();
2761
2762 ## @}
2763
2764
2765 def parseComment(self):
2766 """
2767 Parse the current comment (self.sComment).
2768
2769 If it's a opcode specifiying comment, we reset the macro stuff.
2770 """
2771 #
2772 # Reject if comment doesn't seem to contain anything interesting.
2773 #
2774 if self.sComment.find('Opcode') < 0 \
2775 and self.sComment.find('@') < 0:
2776 return False;
2777
2778 #
2779 # Split the comment into lines, removing leading asterisks and spaces.
2780 # Also remove leading and trailing empty lines.
2781 #
2782 asLines = self.sComment.split('\n');
2783 for iLine, sLine in enumerate(asLines):
2784 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2785
2786 while asLines and not asLines[0]:
2787 self.iCommentLine += 1;
2788 asLines.pop(0);
2789
2790 while asLines and not asLines[-1]:
2791 asLines.pop(len(asLines) - 1);
2792
2793 #
2794 # Check for old style: Opcode 0x0f 0x12
2795 #
2796 if asLines[0].startswith('Opcode '):
2797 self.parseCommentOldOpcode(asLines);
2798
2799 #
2800 # Look for @op* tagged data.
2801 #
2802 cOpTags = 0;
2803 sFlatDefault = None;
2804 sCurTag = '@default';
2805 iCurTagLine = 0;
2806 asCurSection = [];
2807 aasSections = [ asCurSection, ];
2808 for iLine, sLine in enumerate(asLines):
2809 if not sLine.startswith('@'):
2810 if sLine:
2811 asCurSection.append(sLine);
2812 elif asCurSection:
2813 asCurSection = [];
2814 aasSections.append(asCurSection);
2815 else:
2816 #
2817 # Process the previous tag.
2818 #
2819 if not asCurSection and len(aasSections) > 1:
2820 aasSections.pop(-1);
2821 if sCurTag in self.dTagHandlers:
2822 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2823 cOpTags += 1;
2824 elif sCurTag.startswith('@op'):
2825 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2826 elif sCurTag == '@default':
2827 sFlatDefault = self.flattenAllSections(aasSections);
2828 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2829 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2830 elif sCurTag in ['@encoding', '@opencoding']:
2831 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2832
2833 #
2834 # New tag.
2835 #
2836 asSplit = sLine.split(None, 1);
2837 sCurTag = asSplit[0].lower();
2838 if len(asSplit) > 1:
2839 asCurSection = [asSplit[1],];
2840 else:
2841 asCurSection = [];
2842 aasSections = [asCurSection, ];
2843 iCurTagLine = iLine;
2844
2845 #
2846 # Process the final tag.
2847 #
2848 if not asCurSection and len(aasSections) > 1:
2849 aasSections.pop(-1);
2850 if sCurTag in self.dTagHandlers:
2851 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2852 cOpTags += 1;
2853 elif sCurTag.startswith('@op'):
2854 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2855 elif sCurTag == '@default':
2856 sFlatDefault = self.flattenAllSections(aasSections);
2857
2858 #
2859 # Don't allow default text in blocks containing @op*.
2860 #
2861 if cOpTags > 0 and sFlatDefault:
2862 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2863
2864 return True;
2865
2866 def parseMacroInvocation(self, sInvocation):
2867 """
2868 Parses a macro invocation.
2869
2870 Returns a tuple, first element is the offset following the macro
2871 invocation. The second element is a list of macro arguments, where the
2872 zero'th is the macro name.
2873 """
2874 # First the name.
2875 offOpen = sInvocation.find('(');
2876 if offOpen <= 0:
2877 self.raiseError("macro invocation open parenthesis not found");
2878 sName = sInvocation[:offOpen].strip();
2879 if not self.oReMacroName.match(sName):
2880 return self.error("invalid macro name '%s'" % (sName,));
2881 asRet = [sName, ];
2882
2883 # Arguments.
2884 iLine = self.iLine;
2885 cDepth = 1;
2886 off = offOpen + 1;
2887 offStart = off;
2888 while cDepth > 0:
2889 if off >= len(sInvocation):
2890 if iLine >= len(self.asLines):
2891 return self.error('macro invocation beyond end of file');
2892 sInvocation += self.asLines[iLine];
2893 iLine += 1;
2894 ch = sInvocation[off];
2895
2896 if ch == ',' or ch == ')':
2897 if cDepth == 1:
2898 asRet.append(sInvocation[offStart:off].strip());
2899 offStart = off + 1;
2900 if ch == ')':
2901 cDepth -= 1;
2902 elif ch == '(':
2903 cDepth += 1;
2904 off += 1;
2905
2906 return (off, asRet);
2907
2908 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2909 """
2910 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2911 """
2912 offHit = sCode.find(sMacro);
2913 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2914 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2915 return (offHit + offAfter, asRet);
2916 return (len(sCode), None);
2917
2918 def findAndParseMacroInvocation(self, sCode, sMacro):
2919 """
2920 Returns None if not found, arguments as per parseMacroInvocation if found.
2921 """
2922 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2923
2924 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2925 """
2926 Returns same as findAndParseMacroInvocation.
2927 """
2928 for sMacro in asMacro:
2929 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2930 if asRet is not None:
2931 return asRet;
2932 return None;
2933
2934 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2935 sDisHints, sIemHints, asOperands):
2936 """
2937 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2938 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2939 """
2940 #
2941 # Some invocation checks.
2942 #
2943 if sUpper != sUpper.upper():
2944 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2945 if sLower != sLower.lower():
2946 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2947 if sUpper.lower() != sLower:
2948 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2949 if not self.oReMnemonic.match(sLower):
2950 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2951
2952 #
2953 # Check if sIemHints tells us to not consider this macro invocation.
2954 #
2955 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2956 return True;
2957
2958 # Apply to the last instruction only for now.
2959 if not self.aoCurInstrs:
2960 self.addInstruction();
2961 oInstr = self.aoCurInstrs[-1];
2962 if oInstr.iLineMnemonicMacro == -1:
2963 oInstr.iLineMnemonicMacro = self.iLine;
2964 else:
2965 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2966 % (sMacro, oInstr.iLineMnemonicMacro,));
2967
2968 # Mnemonic
2969 if oInstr.sMnemonic is None:
2970 oInstr.sMnemonic = sLower;
2971 elif oInstr.sMnemonic != sLower:
2972 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2973
2974 # Process operands.
2975 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2976 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2977 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2978 for iOperand, sType in enumerate(asOperands):
2979 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2980 if sWhere is None:
2981 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2982 if iOperand < len(oInstr.aoOperands): # error recovery.
2983 sWhere = oInstr.aoOperands[iOperand].sWhere;
2984 sType = oInstr.aoOperands[iOperand].sType;
2985 else:
2986 sWhere = 'reg';
2987 sType = 'Gb';
2988 if iOperand == len(oInstr.aoOperands):
2989 oInstr.aoOperands.append(Operand(sWhere, sType))
2990 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2991 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2992 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2993 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2994
2995 # Encoding.
2996 if sForm not in g_kdIemForms:
2997 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2998 else:
2999 if oInstr.sEncoding is None:
3000 oInstr.sEncoding = g_kdIemForms[sForm][0];
3001 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
3002 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
3003 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
3004
3005 # Check the parameter locations for the encoding.
3006 if g_kdIemForms[sForm][1] is not None:
3007 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
3008 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
3009 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
3010 else:
3011 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
3012 if oInstr.aoOperands[iOperand].sWhere != sWhere:
3013 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
3014 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
3015
3016 # Stats.
3017 if not self.oReStatsName.match(sStats):
3018 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
3019 elif oInstr.sStats is None:
3020 oInstr.sStats = sStats;
3021 elif oInstr.sStats != sStats:
3022 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
3023 % (sMacro, oInstr.sStats, sStats,));
3024
3025 # Process the hints (simply merge with @ophints w/o checking anything).
3026 for sHint in sDisHints.split('|'):
3027 sHint = sHint.strip();
3028 if sHint.startswith('DISOPTYPE_'):
3029 sShortHint = sHint[len('DISOPTYPE_'):].lower();
3030 if sShortHint in g_kdHints:
3031 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3032 else:
3033 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
3034 elif sHint != '0':
3035 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
3036
3037 for sHint in sIemHints.split('|'):
3038 sHint = sHint.strip();
3039 if sHint.startswith('IEMOPHINT_'):
3040 sShortHint = sHint[len('IEMOPHINT_'):].lower();
3041 if sShortHint in g_kdHints:
3042 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3043 else:
3044 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
3045 elif sHint != '0':
3046 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
3047
3048
3049 _ = sAsm;
3050 return True;
3051
3052 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
3053 """
3054 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
3055 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
3056 """
3057 if not asOperands:
3058 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3059 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
3060 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3061
3062 def checkCodeForMacro(self, sCode):
3063 """
3064 Checks code for relevant macro invocation.
3065 """
3066 #
3067 # Scan macro invocations.
3068 #
3069 if sCode.find('(') > 0:
3070 # Look for instruction decoder function definitions. ASSUME single line.
3071 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3072 [ 'FNIEMOP_DEF',
3073 'FNIEMOP_STUB',
3074 'FNIEMOP_STUB_1',
3075 'FNIEMOP_UD_STUB',
3076 'FNIEMOP_UD_STUB_1' ]);
3077 if asArgs is not None:
3078 sFunction = asArgs[1];
3079
3080 if not self.aoCurInstrs:
3081 self.addInstruction();
3082 for oInstr in self.aoCurInstrs:
3083 if oInstr.iLineFnIemOpMacro == -1:
3084 oInstr.iLineFnIemOpMacro = self.iLine;
3085 else:
3086 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3087 self.setInstrunctionAttrib('sFunction', sFunction);
3088 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3089 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3090 if asArgs[0].find('STUB') > 0:
3091 self.doneInstructions();
3092 return True;
3093
3094 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3095 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3096 if asArgs is not None:
3097 if len(self.aoCurInstrs) == 1:
3098 oInstr = self.aoCurInstrs[0];
3099 if oInstr.sStats is None:
3100 oInstr.sStats = asArgs[1];
3101 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3102
3103 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3104 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3105 if asArgs is not None:
3106 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3107 []);
3108 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3109 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3110 if asArgs is not None:
3111 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3112 [asArgs[6],]);
3113 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3114 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3115 if asArgs is not None:
3116 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3117 [asArgs[6], asArgs[7]]);
3118 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3119 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3120 if asArgs is not None:
3121 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3122 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3123 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3124 # a_fIemHints)
3125 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3126 if asArgs is not None:
3127 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3128 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3129
3130 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3131 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3132 if asArgs is not None:
3133 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3134 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3135 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3136 if asArgs is not None:
3137 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3138 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3139 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3140 if asArgs is not None:
3141 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3142 [asArgs[4], asArgs[5],]);
3143 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3144 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3145 if asArgs is not None:
3146 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3147 [asArgs[4], asArgs[5], asArgs[6],]);
3148 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3149 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3150 if asArgs is not None:
3151 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3152 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3153
3154 return False;
3155
3156
3157 def parse(self):
3158 """
3159 Parses the given file.
3160 Returns number or errors.
3161 Raises exception on fatal trouble.
3162 """
3163 #self.debug('Parsing %s' % (self.sSrcFile,));
3164
3165 while self.iLine < len(self.asLines):
3166 sLine = self.asLines[self.iLine];
3167 self.iLine += 1;
3168
3169 # We only look for comments, so only lines with a slash might possibly
3170 # influence the parser state.
3171 offSlash = sLine.find('/');
3172 if offSlash >= 0:
3173 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3174 offLine = 0;
3175 while offLine < len(sLine):
3176 if self.iState == self.kiCode:
3177 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3178 if offHit >= 0:
3179 self.checkCodeForMacro(sLine[offLine:offHit]);
3180 self.sComment = '';
3181 self.iCommentLine = self.iLine;
3182 self.iState = self.kiCommentMulti;
3183 offLine = offHit + 2;
3184 else:
3185 self.checkCodeForMacro(sLine[offLine:]);
3186 offLine = len(sLine);
3187
3188 elif self.iState == self.kiCommentMulti:
3189 offHit = sLine.find('*/', offLine);
3190 if offHit >= 0:
3191 self.sComment += sLine[offLine:offHit];
3192 self.iState = self.kiCode;
3193 offLine = offHit + 2;
3194 self.parseComment();
3195 else:
3196 self.sComment += sLine[offLine:];
3197 offLine = len(sLine);
3198 else:
3199 assert False;
3200 # C++ line comment.
3201 elif offSlash > 0:
3202 self.checkCodeForMacro(sLine[:offSlash]);
3203
3204 # No slash, but append the line if in multi-line comment.
3205 elif self.iState == self.kiCommentMulti:
3206 #self.debug('line %d: multi' % (self.iLine,));
3207 self.sComment += sLine;
3208
3209 # No slash, but check code line for relevant macro.
3210 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3211 #self.debug('line %d: macro' % (self.iLine,));
3212 self.checkCodeForMacro(sLine);
3213
3214 # If the line is a '}' in the first position, complete the instructions.
3215 elif self.iState == self.kiCode and sLine[0] == '}':
3216 #self.debug('line %d: }' % (self.iLine,));
3217 self.doneInstructions();
3218
3219 self.doneInstructions();
3220 self.debug('%3s stubs out of %3s instructions in %s'
3221 % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3222 return self.printErrors();
3223
3224
3225def __parseFileByName(sSrcFile, sDefaultMap):
3226 """
3227 Parses one source file for instruction specfications.
3228 """
3229 #
3230 # Read sSrcFile into a line array.
3231 #
3232 try:
3233 oFile = open(sSrcFile, "r");
3234 except Exception as oXcpt:
3235 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3236 try:
3237 asLines = oFile.readlines();
3238 except Exception as oXcpt:
3239 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3240 finally:
3241 oFile.close();
3242
3243 #
3244 # Do the parsing.
3245 #
3246 try:
3247 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3248 except ParserException as oXcpt:
3249 print(str(oXcpt));
3250 raise;
3251 except Exception as oXcpt:
3252 raise;
3253
3254 return cErrors;
3255
3256
3257def __doTestCopying():
3258 """
3259 Executes the asCopyTests instructions.
3260 """
3261 asErrors = [];
3262 for oDstInstr in g_aoAllInstructions:
3263 if oDstInstr.asCopyTests:
3264 for sSrcInstr in oDstInstr.asCopyTests:
3265 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3266 if oSrcInstr:
3267 aoSrcInstrs = [oSrcInstr,];
3268 else:
3269 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3270 if aoSrcInstrs:
3271 for oSrcInstr in aoSrcInstrs:
3272 if oSrcInstr != oDstInstr:
3273 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3274 else:
3275 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3276 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3277 else:
3278 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3279 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3280
3281 if asErrors:
3282 sys.stderr.write(u''.join(asErrors));
3283 return len(asErrors);
3284
3285
3286def __applyOnlyTest():
3287 """
3288 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3289 all other instructions so that only these get tested.
3290 """
3291 if g_aoOnlyTestInstructions:
3292 for oInstr in g_aoAllInstructions:
3293 if oInstr.aoTests:
3294 if oInstr not in g_aoOnlyTestInstructions:
3295 oInstr.aoTests = [];
3296 return 0;
3297
3298def __parseAll():
3299 """
3300 Parses all the IEMAllInstruction*.cpp.h files.
3301
3302 Raises exception on failure.
3303 """
3304 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3305 cErrors = 0;
3306 for sDefaultMap, sName in [
3307 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3308 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3309 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3310 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3311 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3312 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3313 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3314 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3315 ]:
3316 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3317 cErrors += __doTestCopying();
3318 cErrors += __applyOnlyTest();
3319
3320 if cErrors != 0:
3321 #raise Exception('%d parse errors' % (cErrors,));
3322 sys.exit(1);
3323 return True;
3324
3325
3326
3327__parseAll();
3328
3329
3330#
3331# Generators (may perhaps move later).
3332#
3333def generateDisassemblerTables(oDstFile = sys.stdout):
3334 """
3335 Generates disassembler tables.
3336 """
3337
3338 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3339 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3340 assert oMap.sName == sName;
3341 asLines = [];
3342
3343 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3344 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3345 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3346 asLines.append('{');
3347
3348 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3349
3350 aoTableOrder = oMap.getInstructionsInTableOrder();
3351 for iInstr, oInstr in enumerate(aoTableOrder):
3352
3353 if (iInstr & 0xf) == 0:
3354 if iInstr != 0:
3355 asLines.append('');
3356 asLines.append(' /* %x */' % (iInstr >> 4,));
3357
3358 if oInstr is None:
3359 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3360 elif isinstance(oInstr, list):
3361 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3362 else:
3363 sMacro = 'OP';
3364 cMaxOperands = 3;
3365 if len(oInstr.aoOperands) > 3:
3366 sMacro = 'OPVEX'
3367 cMaxOperands = 4;
3368 assert len(oInstr.aoOperands) <= cMaxOperands;
3369
3370 #
3371 # Format string.
3372 #
3373 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3374 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3375 sTmp += ' ' if iOperand == 0 else ',';
3376 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3377 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3378 else:
3379 sTmp += g_kdOpTypes[oOperand.sType][2];
3380 sTmp += '",';
3381 asColumns = [ sTmp, ];
3382
3383 #
3384 # Decoders.
3385 #
3386 iStart = len(asColumns);
3387 if oInstr.sEncoding is None:
3388 pass;
3389 elif oInstr.sEncoding == 'ModR/M':
3390 # ASSUME the first operand is using the ModR/M encoding
3391 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3392 asColumns.append('IDX_ParseModRM,');
3393 ## @todo IDX_ParseVexDest
3394 # Is second operand using ModR/M too?
3395 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3396 asColumns.append('IDX_UseModRM,')
3397 elif oInstr.sEncoding in [ 'prefix', ]:
3398 for oOperand in oInstr.aoOperands:
3399 asColumns.append('0,');
3400 elif oInstr.sEncoding in [ 'fixed' ]:
3401 pass;
3402 elif oInstr.sEncoding == 'vex2':
3403 asColumns.append('IDX_ParseVex2b,')
3404 elif oInstr.sEncoding == 'vex3':
3405 asColumns.append('IDX_ParseVex3b,')
3406 elif oInstr.sEncoding in g_dInstructionMaps:
3407 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3408 else:
3409 ## @todo
3410 #IDX_ParseTwoByteEsc,
3411 #IDX_ParseGrp1,
3412 #IDX_ParseShiftGrp2,
3413 #IDX_ParseGrp3,
3414 #IDX_ParseGrp4,
3415 #IDX_ParseGrp5,
3416 #IDX_Parse3DNow,
3417 #IDX_ParseGrp6,
3418 #IDX_ParseGrp7,
3419 #IDX_ParseGrp8,
3420 #IDX_ParseGrp9,
3421 #IDX_ParseGrp10,
3422 #IDX_ParseGrp12,
3423 #IDX_ParseGrp13,
3424 #IDX_ParseGrp14,
3425 #IDX_ParseGrp15,
3426 #IDX_ParseGrp16,
3427 #IDX_ParseThreeByteEsc4,
3428 #IDX_ParseThreeByteEsc5,
3429 #IDX_ParseModFence,
3430 #IDX_ParseEscFP,
3431 #IDX_ParseNopPause,
3432 #IDX_ParseInvOpModRM,
3433 assert False, str(oInstr);
3434
3435 # Check for immediates and stuff in the remaining operands.
3436 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3437 sIdx = g_kdOpTypes[oOperand.sType][0];
3438 if sIdx != 'IDX_UseModRM':
3439 asColumns.append(sIdx + ',');
3440 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3441
3442 #
3443 # Opcode and operands.
3444 #
3445 assert oInstr.sDisEnum, str(oInstr);
3446 asColumns.append(oInstr.sDisEnum + ',');
3447 iStart = len(asColumns)
3448 for oOperand in oInstr.aoOperands:
3449 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3450 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3451
3452 #
3453 # Flags.
3454 #
3455 sTmp = '';
3456 for sHint in sorted(oInstr.dHints.keys()):
3457 sDefine = g_kdHints[sHint];
3458 if sDefine.startswith('DISOPTYPE_'):
3459 if sTmp:
3460 sTmp += ' | ' + sDefine;
3461 else:
3462 sTmp += sDefine;
3463 if sTmp:
3464 sTmp += '),';
3465 else:
3466 sTmp += '0),';
3467 asColumns.append(sTmp);
3468
3469 #
3470 # Format the columns into a line.
3471 #
3472 sLine = '';
3473 for i, s in enumerate(asColumns):
3474 if len(sLine) < aoffColumns[i]:
3475 sLine += ' ' * (aoffColumns[i] - len(sLine));
3476 else:
3477 sLine += ' ';
3478 sLine += s;
3479
3480 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3481 # DISOPTYPE_HARMLESS),
3482 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3483 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3484
3485 asLines.append(sLine);
3486
3487 asLines.append('};');
3488 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3489
3490 #
3491 # Write out the lines.
3492 #
3493 oDstFile.write('\n'.join(asLines));
3494 oDstFile.write('\n');
3495 break; #for now
3496
3497if __name__ == '__main__':
3498 generateDisassemblerTables();
3499
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette