VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66972

Last change on this file since 66972 was 66966, checked in by vboxsync, 8 years ago

IEM: Implemented vmovlps Mq,Vq (VEX.0F 13 mod!=3).

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 144.7 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66966 2017-05-19 09:49:59Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66966 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
217 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
218 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
219 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
220 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
221 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
222 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
223 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
224 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
225 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
226 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
227 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
228 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
229 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
230 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', ),
231
232 # ModR/M.rm - register only.
233 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
234 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
235 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
236 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
237 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
238 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
239 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', ),
240
241 # ModR/M.rm - memory only.
242 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
243 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
244 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
245 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
246 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
247 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
248 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
249 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
250 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
251
252 # ModR/M.reg
253 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
254 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
255 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
256 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
257 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', ),
258 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
259 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
260 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
261 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
262 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
263 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
264 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
265 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
266 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
267 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
268 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
269 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
270 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', ),
271 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
272 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
273 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', ),
274 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', ),
275
276 # VEX.vvvv
277 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', ),
278 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', ),
279 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', ),
280
281 # Immediate values.
282 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
283 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
284 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
285 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
286 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
287 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
288
289 # Address operands (no ModR/M).
290 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
291 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
292
293 # Relative jump targets
294 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
295 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
296
297 # DS:rSI
298 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
299 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
300 # ES:rDI
301 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
302 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
303
304 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
305
306 # Fixed registers.
307 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
308 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
309 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
310 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
311 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
312 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
313 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
314 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
315};
316
317# IDX_ParseFixedReg
318# IDX_ParseVexDest
319
320
321## IEMFORM_XXX mappings.
322g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
323 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
324 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
325 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
326 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
327 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
328 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
329 'M': ( 'ModR/M', [ 'rm', ], ),
330 'M_REG': ( 'ModR/M', [ 'rm', ], ),
331 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
332 'R': ( 'ModR/M', [ 'reg', ], ),
333
334 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
335 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
336 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
337 'VEX_XM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ), # same as VEX_RM_MEM, typo?
338 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
339 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
340 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
341 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
342 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
343 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
344 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
345 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
346 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
347 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
348 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
349 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
350 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
351
352 'FIXED': ( 'fixed', None, )
353};
354
355## \@oppfx values.
356g_kdPrefixes = {
357 'none': [],
358 '0x66': [],
359 '0xf3': [],
360 '0xf2': [],
361};
362
363## Special \@opcode tag values.
364g_kdSpecialOpcodes = {
365 '/reg': [],
366 'mr/reg': [],
367 '11 /reg': [],
368 '!11 /reg': [],
369 '11 mr/reg': [],
370 '!11 mr/reg': [],
371};
372
373## Special \@opcodesub tag values.
374g_kdSubOpcodes = {
375 'none': [ None, ],
376 '11 mr/reg': [ '11 mr/reg', ],
377 '11': [ '11 mr/reg', ], ##< alias
378 '!11 mr/reg': [ '!11 mr/reg', ],
379 '!11': [ '!11 mr/reg', ], ##< alias
380};
381
382## Valid values for \@openc
383g_kdEncodings = {
384 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
385 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
386 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
387 'prefix': [ None, ], ##< Prefix
388};
389
390## \@opunused, \@opinvalid, \@opinvlstyle
391g_kdInvalidStyles = {
392 'immediate': [], ##< CPU stops decoding immediately after the opcode.
393 'intel-modrm': [], ##< Intel decodes ModR/M.
394 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
395 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
396 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
397};
398
399g_kdCpuNames = {
400 '8086': (),
401 '80186': (),
402 '80286': (),
403 '80386': (),
404 '80486': (),
405};
406
407## \@opcpuid
408g_kdCpuIdFlags = {
409 'vme': 'X86_CPUID_FEATURE_EDX_VME',
410 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
411 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
412 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
413 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
414 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
415 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
416 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
417 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
418 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
419 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
420 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
421 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
422 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
423 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
424 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
425 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
426 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
427 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
428 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
429 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
430 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
431 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
432 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
433 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
434 'aes': 'X86_CPUID_FEATURE_ECX_AES',
435 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
436 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
437 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
438 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
439
440 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
441 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
442 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
443 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
444 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
445 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
446 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
447 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
448 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
449 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
450};
451
452## \@ophints values.
453g_kdHints = {
454 'invalid': 'DISOPTYPE_INVALID', ##<
455 'harmless': 'DISOPTYPE_HARMLESS', ##<
456 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
457 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
458 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
459 'portio': 'DISOPTYPE_PORTIO', ##<
460 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
461 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
462 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
463 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
464 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
465 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
466 'illegal': 'DISOPTYPE_ILLEGAL', ##<
467 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
468 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
469 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
470 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
471 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
472 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
473 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
474 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
475 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
476 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
477 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
478 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
479 ## (only in 16 & 32 bits mode!)
480 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
481 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
482 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
483 'ignores_op_size': '', ##< Ignores both operand size prefixes (66h + REX.W).
484 'ignores_vex_l': '', ##< Ignores VEX.L.
485 'vex_l_zero': '', ##< VEX.L must be 0.
486 'lock_allowed': '', ##< Lock prefix allowed.
487};
488
489## \@opxcpttype values (see SDMv2 2.4, 2.7).
490g_kdXcptTypes = {
491 'none': [],
492 '1': [],
493 '2': [],
494 '3': [],
495 '4': [],
496 '4UA': [],
497 '5': [],
498 '5LZ': [], # LZ = VEX.L must be zero.
499 '6': [],
500 '7': [],
501 '7LZ': [],
502 '8': [],
503 '11': [],
504 '12': [],
505 'E1': [],
506 'E1NF': [],
507 'E2': [],
508 'E3': [],
509 'E3NF': [],
510 'E4': [],
511 'E4NF': [],
512 'E5': [],
513 'E5NF': [],
514 'E6': [],
515 'E6NF': [],
516 'E7NF': [],
517 'E9': [],
518 'E9NF': [],
519 'E10': [],
520 'E11': [],
521 'E12': [],
522 'E12NF': [],
523};
524
525
526def _isValidOpcodeByte(sOpcode):
527 """
528 Checks if sOpcode is a valid lower case opcode byte.
529 Returns true/false.
530 """
531 if len(sOpcode) == 4:
532 if sOpcode[:2] == '0x':
533 if sOpcode[2] in '0123456789abcdef':
534 if sOpcode[3] in '0123456789abcdef':
535 return True;
536 return False;
537
538
539class InstructionMap(object):
540 """
541 Instruction map.
542
543 The opcode map provides the lead opcode bytes (empty for the one byte
544 opcode map). An instruction can be member of multiple opcode maps as long
545 as it uses the same opcode value within the map (because of VEX).
546 """
547
548 kdEncodings = {
549 'legacy': [],
550 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
551 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
552 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
553 'xop8': [], ##< XOP prefix with vvvvv = 8
554 'xop9': [], ##< XOP prefix with vvvvv = 9
555 'xop10': [], ##< XOP prefix with vvvvv = 10
556 };
557 ## Selectors.
558 ## The first value is the number of table entries required by a
559 ## decoder or disassembler for this type of selector.
560 kdSelectors = {
561 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
562 '/r': [ 8, ], ##< modrm.reg selects the instruction.
563 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
564 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
565 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
566 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
567 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
568 };
569
570 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
571 assert sSelector in self.kdSelectors;
572 assert sEncoding in self.kdEncodings;
573 if asLeadOpcodes is None:
574 asLeadOpcodes = [];
575 else:
576 for sOpcode in asLeadOpcodes:
577 assert _isValidOpcodeByte(sOpcode);
578 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
579
580 self.sName = sName;
581 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
582 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
583 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
584 self.aoInstructions = []; # type: Instruction
585 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
586
587 def getTableSize(self):
588 """
589 Number of table entries. This corresponds directly to the selector.
590 """
591 return self.kdSelectors[self.sSelector][0];
592
593 def getInstructionIndex(self, oInstr):
594 """
595 Returns the table index for the instruction.
596 """
597 bOpcode = oInstr.getOpcodeByte();
598
599 # The byte selector is simple. We need a full opcode byte and need just return it.
600 if self.sSelector == 'byte':
601 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
602 return bOpcode;
603
604 # The other selectors needs masking and shifting.
605 if self.sSelector == '/r':
606 return (bOpcode >> 3) & 0x7;
607
608 if self.sSelector == 'mod /r':
609 return (bOpcode >> 3) & 0x1f;
610
611 if self.sSelector == 'memreg /r':
612 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
613
614 if self.sSelector == '!11 /r':
615 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
616 return (bOpcode >> 3) & 0x7;
617
618 if self.sSelector == '11 /r':
619 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
620 return (bOpcode >> 3) & 0x7;
621
622 if self.sSelector == '11':
623 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
624 return bOpcode & 0x3f;
625
626 assert False, self.sSelector;
627 return -1;
628
629 def getInstructionsInTableOrder(self):
630 """
631 Get instructions in table order.
632
633 Returns array of instructions. Normally there is exactly one
634 instruction per entry. However the entry could also be None if
635 not instruction was specified for that opcode value. Or there
636 could be a list of instructions to deal with special encodings
637 where for instance prefix (e.g. REX.W) encodes a different
638 instruction or different CPUs have different instructions or
639 prefixes in the same place.
640 """
641 # Start with empty table.
642 cTable = self.getTableSize();
643 aoTable = [None] * cTable;
644
645 # Insert the instructions.
646 for oInstr in self.aoInstructions:
647 if oInstr.sOpcode:
648 idxOpcode = self.getInstructionIndex(oInstr);
649 assert idxOpcode < cTable, str(idxOpcode);
650
651 oExisting = aoTable[idxOpcode];
652 if oExisting is None:
653 aoTable[idxOpcode] = oInstr;
654 elif not isinstance(oExisting, list):
655 aoTable[idxOpcode] = list([oExisting, oInstr]);
656 else:
657 oExisting.append(oInstr);
658
659 return aoTable;
660
661
662 def getDisasTableName(self):
663 """
664 Returns the disassembler table name for this map.
665 """
666 sName = 'g_aDisas';
667 for sWord in self.sName.split('_'):
668 if sWord == 'm': # suffix indicating modrm.mod==mem
669 sName += '_m';
670 elif sWord == 'r': # suffix indicating modrm.mod==reg
671 sName += '_r';
672 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
673 sName += '_' + sWord;
674 else:
675 sWord = sWord.replace('grp', 'Grp');
676 sWord = sWord.replace('map', 'Map');
677 sName += sWord[0].upper() + sWord[1:];
678 return sName;
679
680
681 def isVexMap(self):
682 """ Returns True if a VEX map. """
683 return self.sEncoding.startswith('vex');
684
685
686class TestType(object):
687 """
688 Test value type.
689
690 This base class deals with integer like values. The fUnsigned constructor
691 parameter indicates the default stance on zero vs sign extending. It is
692 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
693 """
694 def __init__(self, sName, acbSizes = None, fUnsigned = True):
695 self.sName = sName;
696 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
697 self.fUnsigned = fUnsigned;
698
699 class BadValue(Exception):
700 """ Bad value exception. """
701 def __init__(self, sMessage):
702 Exception.__init__(self, sMessage);
703 self.sMessage = sMessage;
704
705 ## For ascii ~ operator.
706 kdHexInv = {
707 '0': 'f',
708 '1': 'e',
709 '2': 'd',
710 '3': 'c',
711 '4': 'b',
712 '5': 'a',
713 '6': '9',
714 '7': '8',
715 '8': '7',
716 '9': '6',
717 'a': '5',
718 'b': '4',
719 'c': '3',
720 'd': '2',
721 'e': '1',
722 'f': '0',
723 };
724
725 def get(self, sValue):
726 """
727 Get the shortest normal sized byte representation of oValue.
728
729 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
730 The latter form is for AND+OR pairs where the first entry is what to
731 AND with the field and the second the one or OR with.
732
733 Raises BadValue if invalid value.
734 """
735 if not sValue:
736 raise TestType.BadValue('empty value');
737
738 # Deal with sign and detect hexadecimal or decimal.
739 fSignExtend = not self.fUnsigned;
740 if sValue[0] == '-' or sValue[0] == '+':
741 fSignExtend = True;
742 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
743 else:
744 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
745
746 # try convert it to long integer.
747 try:
748 iValue = long(sValue, 16 if fHex else 10);
749 except Exception as oXcpt:
750 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
751
752 # Convert the hex string and pad it to a decent value. Negative values
753 # needs to be manually converted to something non-negative (~-n + 1).
754 if iValue >= 0:
755 sHex = hex(iValue);
756 if sys.version_info[0] < 3:
757 assert sHex[-1] == 'L';
758 sHex = sHex[:-1];
759 assert sHex[:2] == '0x';
760 sHex = sHex[2:];
761 else:
762 sHex = hex(-iValue - 1);
763 if sys.version_info[0] < 3:
764 assert sHex[-1] == 'L';
765 sHex = sHex[:-1];
766 assert sHex[:2] == '0x';
767 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
768 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
769 sHex = 'f' + sHex;
770
771 cDigits = len(sHex);
772 if cDigits <= self.acbSizes[-1] * 2:
773 for cb in self.acbSizes:
774 cNaturalDigits = cb * 2;
775 if cDigits <= cNaturalDigits:
776 break;
777 else:
778 cNaturalDigits = self.acbSizes[-1] * 2;
779 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
780 assert isinstance(cNaturalDigits, int)
781
782 if cNaturalDigits != cDigits:
783 cNeeded = cNaturalDigits - cDigits;
784 if iValue >= 0:
785 sHex = ('0' * cNeeded) + sHex;
786 else:
787 sHex = ('f' * cNeeded) + sHex;
788
789 # Invert and convert to bytearray and return it.
790 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
791
792 return ((fSignExtend, abValue),);
793
794 def validate(self, sValue):
795 """
796 Returns True if value is okay, error message on failure.
797 """
798 try:
799 self.get(sValue);
800 except TestType.BadValue as oXcpt:
801 return oXcpt.sMessage;
802 return True;
803
804 def isAndOrPair(self, sValue):
805 """
806 Checks if sValue is a pair.
807 """
808 _ = sValue;
809 return False;
810
811
812class TestTypeEflags(TestType):
813 """
814 Special value parsing for EFLAGS/RFLAGS/FLAGS.
815 """
816
817 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
818
819 def __init__(self, sName):
820 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
821
822 def get(self, sValue):
823 fClear = 0;
824 fSet = 0;
825 for sFlag in sValue.split(','):
826 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
827 if sConstant is None:
828 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
829 if sConstant[0] == '!':
830 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
831 else:
832 fSet |= g_kdX86EFlagsConstants[sConstant];
833
834 aoSet = TestType.get(self, '0x%x' % (fSet,));
835 if fClear != 0:
836 aoClear = TestType.get(self, '%#x' % (fClear,))
837 assert self.isAndOrPair(sValue) is True;
838 return (aoClear[0], aoSet[0]);
839 assert self.isAndOrPair(sValue) is False;
840 return aoSet;
841
842 def isAndOrPair(self, sValue):
843 for sZeroFlag in self.kdZeroValueFlags:
844 if sValue.find(sZeroFlag) >= 0:
845 return True;
846 return False;
847
848class TestTypeFromDict(TestType):
849 """
850 Special value parsing for CR0.
851 """
852
853 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
854
855 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
856 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
857 self.kdConstantsAndValues = kdConstantsAndValues;
858 self.sConstantPrefix = sConstantPrefix;
859
860 def get(self, sValue):
861 fValue = 0;
862 for sFlag in sValue.split(','):
863 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
864 if fFlagValue is None:
865 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
866 fValue |= fFlagValue;
867 return TestType.get(self, '0x%x' % (fValue,));
868
869
870class TestInOut(object):
871 """
872 One input or output state modifier.
873
874 This should be thought as values to modify BS3REGCTX and extended (needs
875 to be structured) state.
876 """
877 ## Assigned operators.
878 kasOperators = [
879 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
880 '&~=',
881 '&=',
882 '|=',
883 '='
884 ];
885 ## Types
886 kdTypes = {
887 'uint': TestType('uint', fUnsigned = True),
888 'int': TestType('int'),
889 'efl': TestTypeEflags('efl'),
890 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
891 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
892 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
893 };
894 ## CPU context fields.
895 kdFields = {
896 # name: ( default type, [both|input|output], )
897 # Operands.
898 'op1': ( 'uint', 'both', ), ## \@op1
899 'op2': ( 'uint', 'both', ), ## \@op2
900 'op3': ( 'uint', 'both', ), ## \@op3
901 'op4': ( 'uint', 'both', ), ## \@op4
902 # Flags.
903 'efl': ( 'efl', 'both', ),
904 'efl_undef': ( 'uint', 'output', ),
905 # 8-bit GPRs.
906 'al': ( 'uint', 'both', ),
907 'cl': ( 'uint', 'both', ),
908 'dl': ( 'uint', 'both', ),
909 'bl': ( 'uint', 'both', ),
910 'ah': ( 'uint', 'both', ),
911 'ch': ( 'uint', 'both', ),
912 'dh': ( 'uint', 'both', ),
913 'bh': ( 'uint', 'both', ),
914 'r8l': ( 'uint', 'both', ),
915 'r9l': ( 'uint', 'both', ),
916 'r10l': ( 'uint', 'both', ),
917 'r11l': ( 'uint', 'both', ),
918 'r12l': ( 'uint', 'both', ),
919 'r13l': ( 'uint', 'both', ),
920 'r14l': ( 'uint', 'both', ),
921 'r15l': ( 'uint', 'both', ),
922 # 16-bit GPRs.
923 'ax': ( 'uint', 'both', ),
924 'dx': ( 'uint', 'both', ),
925 'cx': ( 'uint', 'both', ),
926 'bx': ( 'uint', 'both', ),
927 'sp': ( 'uint', 'both', ),
928 'bp': ( 'uint', 'both', ),
929 'si': ( 'uint', 'both', ),
930 'di': ( 'uint', 'both', ),
931 'r8w': ( 'uint', 'both', ),
932 'r9w': ( 'uint', 'both', ),
933 'r10w': ( 'uint', 'both', ),
934 'r11w': ( 'uint', 'both', ),
935 'r12w': ( 'uint', 'both', ),
936 'r13w': ( 'uint', 'both', ),
937 'r14w': ( 'uint', 'both', ),
938 'r15w': ( 'uint', 'both', ),
939 # 32-bit GPRs.
940 'eax': ( 'uint', 'both', ),
941 'edx': ( 'uint', 'both', ),
942 'ecx': ( 'uint', 'both', ),
943 'ebx': ( 'uint', 'both', ),
944 'esp': ( 'uint', 'both', ),
945 'ebp': ( 'uint', 'both', ),
946 'esi': ( 'uint', 'both', ),
947 'edi': ( 'uint', 'both', ),
948 'r8d': ( 'uint', 'both', ),
949 'r9d': ( 'uint', 'both', ),
950 'r10d': ( 'uint', 'both', ),
951 'r11d': ( 'uint', 'both', ),
952 'r12d': ( 'uint', 'both', ),
953 'r13d': ( 'uint', 'both', ),
954 'r14d': ( 'uint', 'both', ),
955 'r15d': ( 'uint', 'both', ),
956 # 64-bit GPRs.
957 'rax': ( 'uint', 'both', ),
958 'rdx': ( 'uint', 'both', ),
959 'rcx': ( 'uint', 'both', ),
960 'rbx': ( 'uint', 'both', ),
961 'rsp': ( 'uint', 'both', ),
962 'rbp': ( 'uint', 'both', ),
963 'rsi': ( 'uint', 'both', ),
964 'rdi': ( 'uint', 'both', ),
965 'r8': ( 'uint', 'both', ),
966 'r9': ( 'uint', 'both', ),
967 'r10': ( 'uint', 'both', ),
968 'r11': ( 'uint', 'both', ),
969 'r12': ( 'uint', 'both', ),
970 'r13': ( 'uint', 'both', ),
971 'r14': ( 'uint', 'both', ),
972 'r15': ( 'uint', 'both', ),
973 # 16-bit, 32-bit or 64-bit registers according to operand size.
974 'oz.rax': ( 'uint', 'both', ),
975 'oz.rdx': ( 'uint', 'both', ),
976 'oz.rcx': ( 'uint', 'both', ),
977 'oz.rbx': ( 'uint', 'both', ),
978 'oz.rsp': ( 'uint', 'both', ),
979 'oz.rbp': ( 'uint', 'both', ),
980 'oz.rsi': ( 'uint', 'both', ),
981 'oz.rdi': ( 'uint', 'both', ),
982 'oz.r8': ( 'uint', 'both', ),
983 'oz.r9': ( 'uint', 'both', ),
984 'oz.r10': ( 'uint', 'both', ),
985 'oz.r11': ( 'uint', 'both', ),
986 'oz.r12': ( 'uint', 'both', ),
987 'oz.r13': ( 'uint', 'both', ),
988 'oz.r14': ( 'uint', 'both', ),
989 'oz.r15': ( 'uint', 'both', ),
990 # Control registers.
991 'cr0': ( 'cr0', 'both', ),
992 'cr4': ( 'cr4', 'both', ),
993 'xcr0': ( 'xcr0', 'both', ),
994 # FPU Registers
995 'fcw': ( 'uint', 'both', ),
996 'fsw': ( 'uint', 'both', ),
997 'ftw': ( 'uint', 'both', ),
998 'fop': ( 'uint', 'both', ),
999 'fpuip': ( 'uint', 'both', ),
1000 'fpucs': ( 'uint', 'both', ),
1001 'fpudp': ( 'uint', 'both', ),
1002 'fpuds': ( 'uint', 'both', ),
1003 'mxcsr': ( 'uint', 'both', ),
1004 'st0': ( 'uint', 'both', ),
1005 'st1': ( 'uint', 'both', ),
1006 'st2': ( 'uint', 'both', ),
1007 'st3': ( 'uint', 'both', ),
1008 'st4': ( 'uint', 'both', ),
1009 'st5': ( 'uint', 'both', ),
1010 'st6': ( 'uint', 'both', ),
1011 'st7': ( 'uint', 'both', ),
1012 # MMX registers.
1013 'mm0': ( 'uint', 'both', ),
1014 'mm1': ( 'uint', 'both', ),
1015 'mm2': ( 'uint', 'both', ),
1016 'mm3': ( 'uint', 'both', ),
1017 'mm4': ( 'uint', 'both', ),
1018 'mm5': ( 'uint', 'both', ),
1019 'mm6': ( 'uint', 'both', ),
1020 'mm7': ( 'uint', 'both', ),
1021 # SSE registers.
1022 'xmm0': ( 'uint', 'both', ),
1023 'xmm1': ( 'uint', 'both', ),
1024 'xmm2': ( 'uint', 'both', ),
1025 'xmm3': ( 'uint', 'both', ),
1026 'xmm4': ( 'uint', 'both', ),
1027 'xmm5': ( 'uint', 'both', ),
1028 'xmm6': ( 'uint', 'both', ),
1029 'xmm7': ( 'uint', 'both', ),
1030 'xmm8': ( 'uint', 'both', ),
1031 'xmm9': ( 'uint', 'both', ),
1032 'xmm10': ( 'uint', 'both', ),
1033 'xmm11': ( 'uint', 'both', ),
1034 'xmm12': ( 'uint', 'both', ),
1035 'xmm13': ( 'uint', 'both', ),
1036 'xmm14': ( 'uint', 'both', ),
1037 'xmm15': ( 'uint', 'both', ),
1038 'xmm0.lo': ( 'uint', 'both', ),
1039 'xmm1.lo': ( 'uint', 'both', ),
1040 'xmm2.lo': ( 'uint', 'both', ),
1041 'xmm3.lo': ( 'uint', 'both', ),
1042 'xmm4.lo': ( 'uint', 'both', ),
1043 'xmm5.lo': ( 'uint', 'both', ),
1044 'xmm6.lo': ( 'uint', 'both', ),
1045 'xmm7.lo': ( 'uint', 'both', ),
1046 'xmm8.lo': ( 'uint', 'both', ),
1047 'xmm9.lo': ( 'uint', 'both', ),
1048 'xmm10.lo': ( 'uint', 'both', ),
1049 'xmm11.lo': ( 'uint', 'both', ),
1050 'xmm12.lo': ( 'uint', 'both', ),
1051 'xmm13.lo': ( 'uint', 'both', ),
1052 'xmm14.lo': ( 'uint', 'both', ),
1053 'xmm15.lo': ( 'uint', 'both', ),
1054 'xmm0.hi': ( 'uint', 'both', ),
1055 'xmm1.hi': ( 'uint', 'both', ),
1056 'xmm2.hi': ( 'uint', 'both', ),
1057 'xmm3.hi': ( 'uint', 'both', ),
1058 'xmm4.hi': ( 'uint', 'both', ),
1059 'xmm5.hi': ( 'uint', 'both', ),
1060 'xmm6.hi': ( 'uint', 'both', ),
1061 'xmm7.hi': ( 'uint', 'both', ),
1062 'xmm8.hi': ( 'uint', 'both', ),
1063 'xmm9.hi': ( 'uint', 'both', ),
1064 'xmm10.hi': ( 'uint', 'both', ),
1065 'xmm11.hi': ( 'uint', 'both', ),
1066 'xmm12.hi': ( 'uint', 'both', ),
1067 'xmm13.hi': ( 'uint', 'both', ),
1068 'xmm14.hi': ( 'uint', 'both', ),
1069 'xmm15.hi': ( 'uint', 'both', ),
1070 'xmm0.lo.zx': ( 'uint', 'both', ),
1071 'xmm1.lo.zx': ( 'uint', 'both', ),
1072 'xmm2.lo.zx': ( 'uint', 'both', ),
1073 'xmm3.lo.zx': ( 'uint', 'both', ),
1074 'xmm4.lo.zx': ( 'uint', 'both', ),
1075 'xmm5.lo.zx': ( 'uint', 'both', ),
1076 'xmm6.lo.zx': ( 'uint', 'both', ),
1077 'xmm7.lo.zx': ( 'uint', 'both', ),
1078 'xmm8.lo.zx': ( 'uint', 'both', ),
1079 'xmm9.lo.zx': ( 'uint', 'both', ),
1080 'xmm10.lo.zx': ( 'uint', 'both', ),
1081 'xmm11.lo.zx': ( 'uint', 'both', ),
1082 'xmm12.lo.zx': ( 'uint', 'both', ),
1083 'xmm13.lo.zx': ( 'uint', 'both', ),
1084 'xmm14.lo.zx': ( 'uint', 'both', ),
1085 'xmm15.lo.zx': ( 'uint', 'both', ),
1086 'xmm0.dw0': ( 'uint', 'both', ),
1087 'xmm1.dw0': ( 'uint', 'both', ),
1088 'xmm2.dw0': ( 'uint', 'both', ),
1089 'xmm3.dw0': ( 'uint', 'both', ),
1090 'xmm4.dw0': ( 'uint', 'both', ),
1091 'xmm5.dw0': ( 'uint', 'both', ),
1092 'xmm6.dw0': ( 'uint', 'both', ),
1093 'xmm7.dw0': ( 'uint', 'both', ),
1094 'xmm8.dw0': ( 'uint', 'both', ),
1095 'xmm9.dw0': ( 'uint', 'both', ),
1096 'xmm10.dw0': ( 'uint', 'both', ),
1097 'xmm11.dw0': ( 'uint', 'both', ),
1098 'xmm12.dw0': ( 'uint', 'both', ),
1099 'xmm13.dw0': ( 'uint', 'both', ),
1100 'xmm14.dw0': ( 'uint', 'both', ),
1101 'xmm15_dw0': ( 'uint', 'both', ),
1102 # AVX registers.
1103 'ymm0': ( 'uint', 'both', ),
1104 'ymm1': ( 'uint', 'both', ),
1105 'ymm2': ( 'uint', 'both', ),
1106 'ymm3': ( 'uint', 'both', ),
1107 'ymm4': ( 'uint', 'both', ),
1108 'ymm5': ( 'uint', 'both', ),
1109 'ymm6': ( 'uint', 'both', ),
1110 'ymm7': ( 'uint', 'both', ),
1111 'ymm8': ( 'uint', 'both', ),
1112 'ymm9': ( 'uint', 'both', ),
1113 'ymm10': ( 'uint', 'both', ),
1114 'ymm11': ( 'uint', 'both', ),
1115 'ymm12': ( 'uint', 'both', ),
1116 'ymm13': ( 'uint', 'both', ),
1117 'ymm14': ( 'uint', 'both', ),
1118 'ymm15': ( 'uint', 'both', ),
1119
1120 # Special ones.
1121 'value.xcpt': ( 'uint', 'output', ),
1122 };
1123
1124 def __init__(self, sField, sOp, sValue, sType):
1125 assert sField in self.kdFields;
1126 assert sOp in self.kasOperators;
1127 self.sField = sField;
1128 self.sOp = sOp;
1129 self.sValue = sValue;
1130 self.sType = sType;
1131 assert isinstance(sField, str);
1132 assert isinstance(sOp, str);
1133 assert isinstance(sType, str);
1134 assert isinstance(sValue, str);
1135
1136
1137class TestSelector(object):
1138 """
1139 One selector for an instruction test.
1140 """
1141 ## Selector compare operators.
1142 kasCompareOps = [ '==', '!=' ];
1143 ## Selector variables and their valid values.
1144 kdVariables = {
1145 # Operand size.
1146 'size': {
1147 'o16': 'size_o16',
1148 'o32': 'size_o32',
1149 'o64': 'size_o64',
1150 },
1151 # VEX.L value.
1152 'vex.l': {
1153 '0': 'vexl_0',
1154 '1': 'vexl_1',
1155 },
1156 # Execution ring.
1157 'ring': {
1158 '0': 'ring_0',
1159 '1': 'ring_1',
1160 '2': 'ring_2',
1161 '3': 'ring_3',
1162 '0..2': 'ring_0_thru_2',
1163 '1..3': 'ring_1_thru_3',
1164 },
1165 # Basic code mode.
1166 'codebits': {
1167 '64': 'code_64bit',
1168 '32': 'code_32bit',
1169 '16': 'code_16bit',
1170 },
1171 # cpu modes.
1172 'mode': {
1173 'real': 'mode_real',
1174 'prot': 'mode_prot',
1175 'long': 'mode_long',
1176 'v86': 'mode_v86',
1177 'smm': 'mode_smm',
1178 'vmx': 'mode_vmx',
1179 'svm': 'mode_svm',
1180 },
1181 # paging on/off
1182 'paging': {
1183 'on': 'paging_on',
1184 'off': 'paging_off',
1185 },
1186 # CPU vendor
1187 'vendor': {
1188 'amd': 'vendor_amd',
1189 'intel': 'vendor_intel',
1190 'via': 'vendor_via',
1191 },
1192 };
1193 ## Selector shorthand predicates.
1194 ## These translates into variable expressions.
1195 kdPredicates = {
1196 'o16': 'size==o16',
1197 'o32': 'size==o32',
1198 'o64': 'size==o64',
1199 'ring0': 'ring==0',
1200 '!ring0': 'ring==1..3',
1201 'ring1': 'ring==1',
1202 'ring2': 'ring==2',
1203 'ring3': 'ring==3',
1204 'user': 'ring==3',
1205 'supervisor': 'ring==0..2',
1206 'real': 'mode==real',
1207 'prot': 'mode==prot',
1208 'long': 'mode==long',
1209 'v86': 'mode==v86',
1210 'smm': 'mode==smm',
1211 'vmx': 'mode==vmx',
1212 'svm': 'mode==svm',
1213 'paging': 'paging==on',
1214 '!paging': 'paging==off',
1215 'amd': 'vendor==amd',
1216 '!amd': 'vendor!=amd',
1217 'intel': 'vendor==intel',
1218 '!intel': 'vendor!=intel',
1219 'via': 'vendor==via',
1220 '!via': 'vendor!=via',
1221 };
1222
1223 def __init__(self, sVariable, sOp, sValue):
1224 assert sVariable in self.kdVariables;
1225 assert sOp in self.kasCompareOps;
1226 assert sValue in self.kdVariables[sVariable];
1227 self.sVariable = sVariable;
1228 self.sOp = sOp;
1229 self.sValue = sValue;
1230
1231
1232class InstructionTest(object):
1233 """
1234 Instruction test.
1235 """
1236
1237 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1238 self.oInstr = oInstr; # type: InstructionTest
1239 self.aoInputs = []; # type: list(TestInOut)
1240 self.aoOutputs = []; # type: list(TestInOut)
1241 self.aoSelectors = []; # type: list(TestSelector)
1242
1243 def toString(self, fRepr = False):
1244 """
1245 Converts it to string representation.
1246 """
1247 asWords = [];
1248 if self.aoSelectors:
1249 for oSelector in self.aoSelectors:
1250 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1251 asWords.append('/');
1252
1253 for oModifier in self.aoInputs:
1254 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1255
1256 asWords.append('->');
1257
1258 for oModifier in self.aoOutputs:
1259 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1260
1261 if fRepr:
1262 return '<' + ' '.join(asWords) + '>';
1263 return ' '.join(asWords);
1264
1265 def __str__(self):
1266 """ Provide string represenation. """
1267 return self.toString(False);
1268
1269 def __repr__(self):
1270 """ Provide unambigious string representation. """
1271 return self.toString(True);
1272
1273class Operand(object):
1274 """
1275 Instruction operand.
1276 """
1277
1278 def __init__(self, sWhere, sType):
1279 assert sWhere in g_kdOpLocations, sWhere;
1280 assert sType in g_kdOpTypes, sType;
1281 self.sWhere = sWhere; ##< g_kdOpLocations
1282 self.sType = sType; ##< g_kdOpTypes
1283
1284 def usesModRM(self):
1285 """ Returns True if using some form of ModR/M encoding. """
1286 return self.sType[0] in ['E', 'G', 'M'];
1287
1288
1289
1290class Instruction(object): # pylint: disable=too-many-instance-attributes
1291 """
1292 Instruction.
1293 """
1294
1295 def __init__(self, sSrcFile, iLine):
1296 ## @name Core attributes.
1297 ## @{
1298 self.sMnemonic = None;
1299 self.sBrief = None;
1300 self.asDescSections = []; # type: list(str)
1301 self.aoMaps = []; # type: list(InstructionMap)
1302 self.aoOperands = []; # type: list(Operand)
1303 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1304 self.sOpcode = None; # type: str
1305 self.sSubOpcode = None; # type: str
1306 self.sEncoding = None;
1307 self.asFlTest = None;
1308 self.asFlModify = None;
1309 self.asFlUndefined = None;
1310 self.asFlSet = None;
1311 self.asFlClear = None;
1312 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1313 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1314 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1315 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1316 self.aoTests = []; # type: list(InstructionTest)
1317 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1318 self.oCpuExpr = None; ##< Some CPU restriction expression...
1319 self.sGroup = None;
1320 self.fUnused = False; ##< Unused instruction.
1321 self.fInvalid = False; ##< Invalid instruction (like UD2).
1322 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1323 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1324 ## @}
1325
1326 ## @name Implementation attributes.
1327 ## @{
1328 self.sStats = None;
1329 self.sFunction = None;
1330 self.fStub = False;
1331 self.fUdStub = False;
1332 ## @}
1333
1334 ## @name Decoding info
1335 ## @{
1336 self.sSrcFile = sSrcFile;
1337 self.iLineCreated = iLine;
1338 self.iLineCompleted = None;
1339 self.cOpTags = 0;
1340 self.iLineFnIemOpMacro = -1;
1341 self.iLineMnemonicMacro = -1;
1342 ## @}
1343
1344 ## @name Intermediate input fields.
1345 ## @{
1346 self.sRawDisOpNo = None;
1347 self.asRawDisParams = [];
1348 self.sRawIemOpFlags = None;
1349 self.sRawOldOpcodes = None;
1350 self.asCopyTests = [];
1351 ## @}
1352
1353 def toString(self, fRepr = False):
1354 """ Turn object into a string. """
1355 aasFields = [];
1356
1357 aasFields.append(['opcode', self.sOpcode]);
1358 aasFields.append(['mnemonic', self.sMnemonic]);
1359 for iOperand, oOperand in enumerate(self.aoOperands):
1360 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1361 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1362 aasFields.append(['encoding', self.sEncoding]);
1363 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1364 aasFields.append(['disenum', self.sDisEnum]);
1365 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1366 aasFields.append(['group', self.sGroup]);
1367 if self.fUnused: aasFields.append(['unused', 'True']);
1368 if self.fInvalid: aasFields.append(['invalid', 'True']);
1369 aasFields.append(['invlstyle', self.sInvalidStyle]);
1370 aasFields.append(['fltest', self.asFlTest]);
1371 aasFields.append(['flmodify', self.asFlModify]);
1372 aasFields.append(['flundef', self.asFlUndefined]);
1373 aasFields.append(['flset', self.asFlSet]);
1374 aasFields.append(['flclear', self.asFlClear]);
1375 aasFields.append(['mincpu', self.sMinCpu]);
1376 aasFields.append(['stats', self.sStats]);
1377 aasFields.append(['sFunction', self.sFunction]);
1378 if self.fStub: aasFields.append(['fStub', 'True']);
1379 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1380 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1381 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1382 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1383
1384 sRet = '<' if fRepr else '';
1385 for sField, sValue in aasFields:
1386 if sValue != None:
1387 if len(sRet) > 1:
1388 sRet += '; ';
1389 sRet += '%s=%s' % (sField, sValue,);
1390 if fRepr:
1391 sRet += '>';
1392
1393 return sRet;
1394
1395 def __str__(self):
1396 """ Provide string represenation. """
1397 return self.toString(False);
1398
1399 def __repr__(self):
1400 """ Provide unambigious string representation. """
1401 return self.toString(True);
1402
1403 def getOpcodeByte(self):
1404 """
1405 Decodes sOpcode into a byte range integer value.
1406 Raises exception if sOpcode is None or invalid.
1407 """
1408 if self.sOpcode is None:
1409 raise Exception('No opcode byte for %s!' % (self,));
1410 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1411
1412 # Full hex byte form.
1413 if sOpcode[:2] == '0x':
1414 return int(sOpcode, 16);
1415
1416 # The /r form:
1417 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1418 return int(sOpcode[-1:]) << 3;
1419
1420 # The 11/r form:
1421 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1422 return (int(sOpcode[-1:]) << 3) | 0xc0;
1423
1424 # The !11/r form (returns mod=1):
1425 ## @todo this doesn't really work...
1426 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1427 return (int(sOpcode[-1:]) << 3) | 0x80;
1428
1429 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1430
1431 @staticmethod
1432 def _flagsToIntegerMask(asFlags):
1433 """
1434 Returns the integer mask value for asFlags.
1435 """
1436 uRet = 0;
1437 if asFlags:
1438 for sFlag in asFlags:
1439 sConstant = g_kdEFlagsMnemonics[sFlag];
1440 assert sConstant[0] != '!', sConstant
1441 uRet |= g_kdX86EFlagsConstants[sConstant];
1442 return uRet;
1443
1444 def getTestedFlagsMask(self):
1445 """ Returns asFlTest into a integer mask value """
1446 return self._flagsToIntegerMask(self.asFlTest);
1447
1448 def getModifiedFlagsMask(self):
1449 """ Returns asFlModify into a integer mask value """
1450 return self._flagsToIntegerMask(self.asFlModify);
1451
1452 def getUndefinedFlagsMask(self):
1453 """ Returns asFlUndefined into a integer mask value """
1454 return self._flagsToIntegerMask(self.asFlUndefined);
1455
1456 def getSetFlagsMask(self):
1457 """ Returns asFlSet into a integer mask value """
1458 return self._flagsToIntegerMask(self.asFlSet);
1459
1460 def getClearedFlagsMask(self):
1461 """ Returns asFlClear into a integer mask value """
1462 return self._flagsToIntegerMask(self.asFlClear);
1463
1464 def onlyInVexMaps(self):
1465 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1466 if not self.aoMaps:
1467 return False;
1468 for oMap in self.aoMaps:
1469 if not oMap.isVexMap():
1470 return False;
1471 return True;
1472
1473
1474
1475## All the instructions.
1476g_aoAllInstructions = []; # type: list(Instruction)
1477
1478## All the instructions indexed by statistics name (opstat).
1479g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1480
1481## All the instructions indexed by function name (opfunction).
1482g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1483
1484## Instructions tagged by oponlytest
1485g_aoOnlyTestInstructions = []; # type: list(Instruction)
1486
1487## Instruction maps.
1488g_dInstructionMaps = {
1489 'one': InstructionMap('one'),
1490 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1491 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1492 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1493 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1494 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1495 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1496 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1497 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1498 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1499 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1500 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1501 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1502 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1503 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1504 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1505 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1506 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1507 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1508 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1509
1510 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1511 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1512 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1513 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1514 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1515 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1516 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1517 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1518 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1519 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1520 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1521 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1522 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1523 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1524
1525 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1526 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1527
1528 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1529 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1530 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1531 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1532 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1533 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1534
1535 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1536 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1537
1538 '3dnow': InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1539 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1540 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1541 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1542 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1543 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1544 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1545 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1546};
1547
1548
1549
1550class ParserException(Exception):
1551 """ Parser exception """
1552 def __init__(self, sMessage):
1553 Exception.__init__(self, sMessage);
1554
1555
1556class SimpleParser(object):
1557 """
1558 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1559 """
1560
1561 ## @name Parser state.
1562 ## @{
1563 kiCode = 0;
1564 kiCommentMulti = 1;
1565 ## @}
1566
1567 def __init__(self, sSrcFile, asLines, sDefaultMap):
1568 self.sSrcFile = sSrcFile;
1569 self.asLines = asLines;
1570 self.iLine = 0;
1571 self.iState = self.kiCode;
1572 self.sComment = '';
1573 self.iCommentLine = 0;
1574 self.aoCurInstrs = [];
1575
1576 assert sDefaultMap in g_dInstructionMaps;
1577 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1578
1579 self.cTotalInstr = 0;
1580 self.cTotalStubs = 0;
1581 self.cTotalTagged = 0;
1582
1583 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1584 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1585 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1586 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1587 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1588 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1589 self.fDebug = True;
1590
1591 self.dTagHandlers = {
1592 '@opbrief': self.parseTagOpBrief,
1593 '@opdesc': self.parseTagOpDesc,
1594 '@opmnemonic': self.parseTagOpMnemonic,
1595 '@op1': self.parseTagOpOperandN,
1596 '@op2': self.parseTagOpOperandN,
1597 '@op3': self.parseTagOpOperandN,
1598 '@op4': self.parseTagOpOperandN,
1599 '@oppfx': self.parseTagOpPfx,
1600 '@opmaps': self.parseTagOpMaps,
1601 '@opcode': self.parseTagOpcode,
1602 '@opcodesub': self.parseTagOpcodeSub,
1603 '@openc': self.parseTagOpEnc,
1604 '@opfltest': self.parseTagOpEFlags,
1605 '@opflmodify': self.parseTagOpEFlags,
1606 '@opflundef': self.parseTagOpEFlags,
1607 '@opflset': self.parseTagOpEFlags,
1608 '@opflclear': self.parseTagOpEFlags,
1609 '@ophints': self.parseTagOpHints,
1610 '@opdisenum': self.parseTagOpDisEnum,
1611 '@opmincpu': self.parseTagOpMinCpu,
1612 '@opcpuid': self.parseTagOpCpuId,
1613 '@opgroup': self.parseTagOpGroup,
1614 '@opunused': self.parseTagOpUnusedInvalid,
1615 '@opinvalid': self.parseTagOpUnusedInvalid,
1616 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1617 '@optest': self.parseTagOpTest,
1618 '@optestign': self.parseTagOpTestIgnore,
1619 '@optestignore': self.parseTagOpTestIgnore,
1620 '@opcopytests': self.parseTagOpCopyTests,
1621 '@oponly': self.parseTagOpOnlyTest,
1622 '@oponlytest': self.parseTagOpOnlyTest,
1623 '@opxcpttype': self.parseTagOpXcptType,
1624 '@opstats': self.parseTagOpStats,
1625 '@opfunction': self.parseTagOpFunction,
1626 '@opdone': self.parseTagOpDone,
1627 };
1628 for i in range(48):
1629 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1630 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1631
1632 self.asErrors = [];
1633
1634 def raiseError(self, sMessage):
1635 """
1636 Raise error prefixed with the source and line number.
1637 """
1638 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1639
1640 def raiseCommentError(self, iLineInComment, sMessage):
1641 """
1642 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1643 """
1644 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1645
1646 def error(self, sMessage):
1647 """
1648 Adds an error.
1649 returns False;
1650 """
1651 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1652 return False;
1653
1654 def errorComment(self, iLineInComment, sMessage):
1655 """
1656 Adds a comment error.
1657 returns False;
1658 """
1659 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1660 return False;
1661
1662 def printErrors(self):
1663 """
1664 Print the errors to stderr.
1665 Returns number of errors.
1666 """
1667 if self.asErrors:
1668 sys.stderr.write(u''.join(self.asErrors));
1669 return len(self.asErrors);
1670
1671 def debug(self, sMessage):
1672 """
1673 For debugging.
1674 """
1675 if self.fDebug:
1676 print('debug: %s' % (sMessage,));
1677
1678
1679 def addInstruction(self, iLine = None):
1680 """
1681 Adds an instruction.
1682 """
1683 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1684 g_aoAllInstructions.append(oInstr);
1685 self.aoCurInstrs.append(oInstr);
1686 return oInstr;
1687
1688 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1689 """
1690 Derives the mnemonic and operands from a IEM stats base name like string.
1691 """
1692 if oInstr.sMnemonic is None:
1693 asWords = sStats.split('_');
1694 oInstr.sMnemonic = asWords[0].lower();
1695 if len(asWords) > 1 and not oInstr.aoOperands:
1696 for sType in asWords[1:]:
1697 if sType in g_kdOpTypes:
1698 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1699 else:
1700 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1701 return False;
1702 return True;
1703
1704 def doneInstructionOne(self, oInstr, iLine):
1705 """
1706 Complete the parsing by processing, validating and expanding raw inputs.
1707 """
1708 assert oInstr.iLineCompleted is None;
1709 oInstr.iLineCompleted = iLine;
1710
1711 #
1712 # Specified instructions.
1713 #
1714 if oInstr.cOpTags > 0:
1715 if oInstr.sStats is None:
1716 pass;
1717
1718 #
1719 # Unspecified legacy stuff. We generally only got a few things to go on here.
1720 # /** Opcode 0x0f 0x00 /0. */
1721 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1722 #
1723 else:
1724 #if oInstr.sRawOldOpcodes:
1725 #
1726 #if oInstr.sMnemonic:
1727 pass;
1728
1729 #
1730 # Common defaults.
1731 #
1732
1733 # Guess mnemonic and operands from stats if the former is missing.
1734 if oInstr.sMnemonic is None:
1735 if oInstr.sStats is not None:
1736 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1737 elif oInstr.sFunction is not None:
1738 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1739
1740 # Derive the disassembler op enum constant from the mnemonic.
1741 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1742 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1743
1744 # Derive the IEM statistics base name from mnemonic and operand types.
1745 if oInstr.sStats is None:
1746 if oInstr.sFunction is not None:
1747 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1748 elif oInstr.sMnemonic is not None:
1749 oInstr.sStats = oInstr.sMnemonic;
1750 for oOperand in oInstr.aoOperands:
1751 if oOperand.sType:
1752 oInstr.sStats += '_' + oOperand.sType;
1753
1754 # Derive the IEM function name from mnemonic and operand types.
1755 if oInstr.sFunction is None:
1756 if oInstr.sMnemonic is not None:
1757 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1758 for oOperand in oInstr.aoOperands:
1759 if oOperand.sType:
1760 oInstr.sFunction += '_' + oOperand.sType;
1761 elif oInstr.sStats:
1762 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1763
1764 #
1765 # Apply default map and then add the instruction to all it's groups.
1766 #
1767 if not oInstr.aoMaps:
1768 oInstr.aoMaps = [ self.oDefaultMap, ];
1769 for oMap in oInstr.aoMaps:
1770 oMap.aoInstructions.append(oInstr);
1771
1772 #
1773 # Derive encoding from operands and maps.
1774 #
1775 if oInstr.sEncoding is None:
1776 if not oInstr.aoOperands:
1777 if oInstr.fUnused and oInstr.sSubOpcode:
1778 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
1779 else:
1780 oInstr.sEncoding = 'fixed';
1781 elif oInstr.aoOperands[0].usesModRM():
1782 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
1783 or oInstr.onlyInVexMaps():
1784 oInstr.sEncoding = 'VEX.ModR/M';
1785 else:
1786 oInstr.sEncoding = 'ModR/M';
1787
1788 #
1789 # Check the opstat value and add it to the opstat indexed dictionary.
1790 #
1791 if oInstr.sStats:
1792 if oInstr.sStats not in g_dAllInstructionsByStat:
1793 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1794 else:
1795 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1796 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1797
1798 #
1799 # Add to function indexed dictionary. We allow multiple instructions per function.
1800 #
1801 if oInstr.sFunction:
1802 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1803 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1804 else:
1805 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1806
1807 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1808 return True;
1809
1810 def doneInstructions(self, iLineInComment = None):
1811 """
1812 Done with current instruction.
1813 """
1814 for oInstr in self.aoCurInstrs:
1815 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1816 if oInstr.fStub:
1817 self.cTotalStubs += 1;
1818
1819 self.cTotalInstr += len(self.aoCurInstrs);
1820
1821 self.sComment = '';
1822 self.aoCurInstrs = [];
1823 return True;
1824
1825 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1826 """
1827 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1828 is False, only None values and empty strings are replaced.
1829 """
1830 for oInstr in self.aoCurInstrs:
1831 if fOverwrite is not True:
1832 oOldValue = getattr(oInstr, sAttrib);
1833 if oOldValue is not None:
1834 continue;
1835 setattr(oInstr, sAttrib, oValue);
1836
1837 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1838 """
1839 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1840 If fOverwrite is False, only None values and empty strings are replaced.
1841 """
1842 for oInstr in self.aoCurInstrs:
1843 aoArray = getattr(oInstr, sAttrib);
1844 while len(aoArray) <= iEntry:
1845 aoArray.append(None);
1846 if fOverwrite is True or aoArray[iEntry] is None:
1847 aoArray[iEntry] = oValue;
1848
1849 def parseCommentOldOpcode(self, asLines):
1850 """ Deals with 'Opcode 0xff /4' like comments """
1851 asWords = asLines[0].split();
1852 if len(asWords) >= 2 \
1853 and asWords[0] == 'Opcode' \
1854 and ( asWords[1].startswith('0x')
1855 or asWords[1].startswith('0X')):
1856 asWords = asWords[:1];
1857 for iWord, sWord in enumerate(asWords):
1858 if sWord.startswith('0X'):
1859 sWord = '0x' + sWord[:2];
1860 asWords[iWord] = asWords;
1861 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1862
1863 return False;
1864
1865 def ensureInstructionForOpTag(self, iTagLine):
1866 """ Ensure there is an instruction for the op-tag being parsed. """
1867 if not self.aoCurInstrs:
1868 self.addInstruction(self.iCommentLine + iTagLine);
1869 for oInstr in self.aoCurInstrs:
1870 oInstr.cOpTags += 1;
1871 if oInstr.cOpTags == 1:
1872 self.cTotalTagged += 1;
1873 return self.aoCurInstrs[-1];
1874
1875 @staticmethod
1876 def flattenSections(aasSections):
1877 """
1878 Flattens multiline sections into stripped single strings.
1879 Returns list of strings, on section per string.
1880 """
1881 asRet = [];
1882 for asLines in aasSections:
1883 if asLines:
1884 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1885 return asRet;
1886
1887 @staticmethod
1888 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1889 """
1890 Flattens sections into a simple stripped string with newlines as
1891 section breaks. The final section does not sport a trailing newline.
1892 """
1893 # Typical: One section with a single line.
1894 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1895 return aasSections[0][0].strip();
1896
1897 sRet = '';
1898 for iSection, asLines in enumerate(aasSections):
1899 if asLines:
1900 if iSection > 0:
1901 sRet += sSectionSep;
1902 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1903 return sRet;
1904
1905
1906
1907 ## @name Tag parsers
1908 ## @{
1909
1910 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1911 """
1912 Tag: \@opbrief
1913 Value: Text description, multiple sections, appended.
1914
1915 Brief description. If not given, it's the first sentence from @opdesc.
1916 """
1917 oInstr = self.ensureInstructionForOpTag(iTagLine);
1918
1919 # Flatten and validate the value.
1920 sBrief = self.flattenAllSections(aasSections);
1921 if not sBrief:
1922 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1923 if sBrief[-1] != '.':
1924 sBrief = sBrief + '.';
1925 if len(sBrief) > 180:
1926 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1927 offDot = sBrief.find('.');
1928 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1929 offDot = sBrief.find('.', offDot + 1);
1930 if offDot >= 0 and offDot != len(sBrief) - 1:
1931 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1932
1933 # Update the instruction.
1934 if oInstr.sBrief is not None:
1935 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1936 % (sTag, oInstr.sBrief, sBrief,));
1937 _ = iEndLine;
1938 return True;
1939
1940 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1941 """
1942 Tag: \@opdesc
1943 Value: Text description, multiple sections, appended.
1944
1945 It is used to describe instructions.
1946 """
1947 oInstr = self.ensureInstructionForOpTag(iTagLine);
1948 if aasSections:
1949 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1950 return True;
1951
1952 _ = sTag; _ = iEndLine;
1953 return True;
1954
1955 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1956 """
1957 Tag: @opmenmonic
1958 Value: mnemonic
1959
1960 The 'mnemonic' value must be a valid C identifier string. Because of
1961 prefixes, groups and whatnot, there times when the mnemonic isn't that
1962 of an actual assembler mnemonic.
1963 """
1964 oInstr = self.ensureInstructionForOpTag(iTagLine);
1965
1966 # Flatten and validate the value.
1967 sMnemonic = self.flattenAllSections(aasSections);
1968 if not self.oReMnemonic.match(sMnemonic):
1969 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1970 if oInstr.sMnemonic is not None:
1971 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1972 % (sTag, oInstr.sMnemonic, sMnemonic,));
1973 oInstr.sMnemonic = sMnemonic
1974
1975 _ = iEndLine;
1976 return True;
1977
1978 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1979 """
1980 Tags: \@op1, \@op2, \@op3, \@op4
1981 Value: [where:]type
1982
1983 The 'where' value indicates where the operand is found, like the 'reg'
1984 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1985 a list.
1986
1987 The 'type' value indicates the operand type. These follow the types
1988 given in the opcode tables in the CPU reference manuals.
1989 See Instruction.kdOperandTypes for a list.
1990
1991 """
1992 oInstr = self.ensureInstructionForOpTag(iTagLine);
1993 idxOp = int(sTag[-1]) - 1;
1994 assert idxOp >= 0 and idxOp < 4;
1995
1996 # flatten, split up, and validate the "where:type" value.
1997 sFlattened = self.flattenAllSections(aasSections);
1998 asSplit = sFlattened.split(':');
1999 if len(asSplit) == 1:
2000 sType = asSplit[0];
2001 sWhere = None;
2002 elif len(asSplit) == 2:
2003 (sWhere, sType) = asSplit;
2004 else:
2005 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
2006
2007 if sType not in g_kdOpTypes:
2008 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2009 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
2010 if sWhere is None:
2011 sWhere = g_kdOpTypes[sType][1];
2012 elif sWhere not in g_kdOpLocations:
2013 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2014 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
2015
2016 # Insert the operand, refusing to overwrite an existing one.
2017 while idxOp >= len(oInstr.aoOperands):
2018 oInstr.aoOperands.append(None);
2019 if oInstr.aoOperands[idxOp] is not None:
2020 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
2021 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
2022 sWhere, sType,));
2023 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
2024
2025 _ = iEndLine;
2026 return True;
2027
2028 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
2029 """
2030 Tag: \@opmaps
2031 Value: map[,map2]
2032
2033 Indicates which maps the instruction is in. There is a default map
2034 associated with each input file.
2035 """
2036 oInstr = self.ensureInstructionForOpTag(iTagLine);
2037
2038 # Flatten, split up and validate the value.
2039 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
2040 asMaps = sFlattened.split(',');
2041 if not asMaps:
2042 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2043 for sMap in asMaps:
2044 if sMap not in g_dInstructionMaps:
2045 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
2046 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2047
2048 # Add the maps to the current list. Throw errors on duplicates.
2049 for oMap in oInstr.aoMaps:
2050 if oMap.sName in asMaps:
2051 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2052
2053 for sMap in asMaps:
2054 oMap = g_dInstructionMaps[sMap];
2055 if oMap not in oInstr.aoMaps:
2056 oInstr.aoMaps.append(oMap);
2057 else:
2058 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2059
2060 _ = iEndLine;
2061 return True;
2062
2063 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2064 """
2065 Tag: \@oppfx
2066 Value: n/a|none|0x66|0xf3|0xf2
2067
2068 Required prefix for the instruction. (In a (E)VEX context this is the
2069 value of the 'pp' field rather than an actual prefix.)
2070 """
2071 oInstr = self.ensureInstructionForOpTag(iTagLine);
2072
2073 # Flatten and validate the value.
2074 sFlattened = self.flattenAllSections(aasSections);
2075 asPrefixes = sFlattened.split();
2076 if len(asPrefixes) > 1:
2077 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2078
2079 sPrefix = asPrefixes[0].lower();
2080 if sPrefix == 'none':
2081 sPrefix = 'none';
2082 elif sPrefix == 'n/a':
2083 sPrefix = None;
2084 else:
2085 if len(sPrefix) == 2:
2086 sPrefix = '0x' + sPrefix;
2087 if not _isValidOpcodeByte(sPrefix):
2088 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2089
2090 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2091 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2092
2093 # Set it.
2094 if oInstr.sPrefix is not None:
2095 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2096 oInstr.sPrefix = sPrefix;
2097
2098 _ = iEndLine;
2099 return True;
2100
2101 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2102 """
2103 Tag: \@opcode
2104 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2105
2106 The opcode byte or sub-byte for the instruction in the context of a map.
2107 """
2108 oInstr = self.ensureInstructionForOpTag(iTagLine);
2109
2110 # Flatten and validate the value.
2111 sOpcode = self.flattenAllSections(aasSections);
2112 if _isValidOpcodeByte(sOpcode):
2113 pass;
2114 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2115 pass;
2116 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2117 pass;
2118 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2119 pass;
2120 else:
2121 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2122
2123 # Set it.
2124 if oInstr.sOpcode is not None:
2125 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2126 oInstr.sOpcode = sOpcode;
2127
2128 _ = iEndLine;
2129 return True;
2130
2131 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2132 """
2133 Tag: \@opcodesub
2134 Value: none | 11 mr/reg | !11 mr/reg
2135
2136 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2137 represents exactly two different instructions. The more proper way would
2138 be to go via maps with two members, but this is faster.
2139 """
2140 oInstr = self.ensureInstructionForOpTag(iTagLine);
2141
2142 # Flatten and validate the value.
2143 sSubOpcode = self.flattenAllSections(aasSections);
2144 if sSubOpcode not in g_kdSubOpcodes:
2145 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2146 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2147
2148 # Set it.
2149 if oInstr.sSubOpcode is not None:
2150 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2151 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2152 oInstr.sSubOpcode = sSubOpcode;
2153
2154 _ = iEndLine;
2155 return True;
2156
2157 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2158 """
2159 Tag: \@openc
2160 Value: ModR/M|fixed|prefix|<map name>
2161
2162 The instruction operand encoding style.
2163 """
2164 oInstr = self.ensureInstructionForOpTag(iTagLine);
2165
2166 # Flatten and validate the value.
2167 sEncoding = self.flattenAllSections(aasSections);
2168 if sEncoding in g_kdEncodings:
2169 pass;
2170 elif sEncoding in g_dInstructionMaps:
2171 pass;
2172 elif not _isValidOpcodeByte(sEncoding):
2173 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2174
2175 # Set it.
2176 if oInstr.sEncoding is not None:
2177 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2178 % ( sTag, oInstr.sEncoding, sEncoding,));
2179 oInstr.sEncoding = sEncoding;
2180
2181 _ = iEndLine;
2182 return True;
2183
2184 ## EFlags tag to Instruction attribute name.
2185 kdOpFlagToAttr = {
2186 '@opfltest': 'asFlTest',
2187 '@opflmodify': 'asFlModify',
2188 '@opflundef': 'asFlUndefined',
2189 '@opflset': 'asFlSet',
2190 '@opflclear': 'asFlClear',
2191 };
2192
2193 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2194 """
2195 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2196 Value: <eflags specifier>
2197
2198 """
2199 oInstr = self.ensureInstructionForOpTag(iTagLine);
2200
2201 # Flatten, split up and validate the values.
2202 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2203 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2204 asFlags = [];
2205 else:
2206 fRc = True;
2207 for iFlag, sFlag in enumerate(asFlags):
2208 if sFlag not in g_kdEFlagsMnemonics:
2209 if sFlag.strip() in g_kdEFlagsMnemonics:
2210 asFlags[iFlag] = sFlag.strip();
2211 else:
2212 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2213 if not fRc:
2214 return False;
2215
2216 # Set them.
2217 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2218 if asOld is not None:
2219 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2220 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2221
2222 _ = iEndLine;
2223 return True;
2224
2225 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2226 """
2227 Tag: \@ophints
2228 Value: Comma or space separated list of flags and hints.
2229
2230 This covers the disassembler flags table and more.
2231 """
2232 oInstr = self.ensureInstructionForOpTag(iTagLine);
2233
2234 # Flatten as a space separated list, split it up and validate the values.
2235 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2236 if len(asHints) == 1 and asHints[0].lower() == 'none':
2237 asHints = [];
2238 else:
2239 fRc = True;
2240 for iHint, sHint in enumerate(asHints):
2241 if sHint not in g_kdHints:
2242 if sHint.strip() in g_kdHints:
2243 sHint[iHint] = sHint.strip();
2244 else:
2245 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2246 if not fRc:
2247 return False;
2248
2249 # Append them.
2250 for sHint in asHints:
2251 if sHint not in oInstr.dHints:
2252 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2253 else:
2254 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2255
2256 _ = iEndLine;
2257 return True;
2258
2259 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2260 """
2261 Tag: \@opdisenum
2262 Value: OP_XXXX
2263
2264 This is for select a specific (legacy) disassembler enum value for the
2265 instruction.
2266 """
2267 oInstr = self.ensureInstructionForOpTag(iTagLine);
2268
2269 # Flatten and split.
2270 asWords = self.flattenAllSections(aasSections).split();
2271 if len(asWords) != 1:
2272 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2273 if not asWords:
2274 return False;
2275 sDisEnum = asWords[0];
2276 if not self.oReDisEnum.match(sDisEnum):
2277 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2278 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2279
2280 # Set it.
2281 if oInstr.sDisEnum is not None:
2282 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2283 oInstr.sDisEnum = sDisEnum;
2284
2285 _ = iEndLine;
2286 return True;
2287
2288 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2289 """
2290 Tag: \@opmincpu
2291 Value: <simple CPU name>
2292
2293 Indicates when this instruction was introduced.
2294 """
2295 oInstr = self.ensureInstructionForOpTag(iTagLine);
2296
2297 # Flatten the value, split into words, make sure there's just one, valid it.
2298 asCpus = self.flattenAllSections(aasSections).split();
2299 if len(asCpus) > 1:
2300 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2301
2302 sMinCpu = asCpus[0];
2303 if sMinCpu in g_kdCpuNames:
2304 oInstr.sMinCpu = sMinCpu;
2305 else:
2306 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2307 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2308
2309 # Set it.
2310 if oInstr.sMinCpu is None:
2311 oInstr.sMinCpu = sMinCpu;
2312 elif oInstr.sMinCpu != sMinCpu:
2313 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2314
2315 _ = iEndLine;
2316 return True;
2317
2318 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2319 """
2320 Tag: \@opcpuid
2321 Value: none | <CPUID flag specifier>
2322
2323 CPUID feature bit which is required for the instruction to be present.
2324 """
2325 oInstr = self.ensureInstructionForOpTag(iTagLine);
2326
2327 # Flatten as a space separated list, split it up and validate the values.
2328 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2329 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2330 asCpuIds = [];
2331 else:
2332 fRc = True;
2333 for iCpuId, sCpuId in enumerate(asCpuIds):
2334 if sCpuId not in g_kdCpuIdFlags:
2335 if sCpuId.strip() in g_kdCpuIdFlags:
2336 sCpuId[iCpuId] = sCpuId.strip();
2337 else:
2338 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2339 if not fRc:
2340 return False;
2341
2342 # Append them.
2343 for sCpuId in asCpuIds:
2344 if sCpuId not in oInstr.asCpuIds:
2345 oInstr.asCpuIds.append(sCpuId);
2346 else:
2347 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2348
2349 _ = iEndLine;
2350 return True;
2351
2352 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2353 """
2354 Tag: \@opgroup
2355 Value: op_grp1[_subgrp2[_subsubgrp3]]
2356
2357 Instruction grouping.
2358 """
2359 oInstr = self.ensureInstructionForOpTag(iTagLine);
2360
2361 # Flatten as a space separated list, split it up and validate the values.
2362 asGroups = self.flattenAllSections(aasSections).split();
2363 if len(asGroups) != 1:
2364 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2365 sGroup = asGroups[0];
2366 if not self.oReGroupName.match(sGroup):
2367 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2368 % (sTag, sGroup, self.oReGroupName.pattern));
2369
2370 # Set it.
2371 if oInstr.sGroup is not None:
2372 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2373 oInstr.sGroup = sGroup;
2374
2375 _ = iEndLine;
2376 return True;
2377
2378 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2379 """
2380 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2381 Value: <invalid opcode behaviour style>
2382
2383 The \@opunused indicates the specification is for a currently unused
2384 instruction encoding.
2385
2386 The \@opinvalid indicates the specification is for an invalid currently
2387 instruction encoding (like UD2).
2388
2389 The \@opinvlstyle just indicates how CPUs decode the instruction when
2390 not supported (\@opcpuid, \@opmincpu) or disabled.
2391 """
2392 oInstr = self.ensureInstructionForOpTag(iTagLine);
2393
2394 # Flatten as a space separated list, split it up and validate the values.
2395 asStyles = self.flattenAllSections(aasSections).split();
2396 if len(asStyles) != 1:
2397 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2398 sStyle = asStyles[0];
2399 if sStyle not in g_kdInvalidStyles:
2400 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2401 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2402 # Set it.
2403 if oInstr.sInvalidStyle is not None:
2404 return self.errorComment(iTagLine,
2405 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2406 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2407 oInstr.sInvalidStyle = sStyle;
2408 if sTag == '@opunused':
2409 oInstr.fUnused = True;
2410 elif sTag == '@opinvalid':
2411 oInstr.fInvalid = True;
2412
2413 _ = iEndLine;
2414 return True;
2415
2416 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2417 """
2418 Tag: \@optest
2419 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2420 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2421
2422 The main idea here is to generate basic instruction tests.
2423
2424 The probably simplest way of handling the diverse input, would be to use
2425 it to produce size optimized byte code for a simple interpreter that
2426 modifies the register input and output states.
2427
2428 An alternative to the interpreter would be creating multiple tables,
2429 but that becomes rather complicated wrt what goes where and then to use
2430 them in an efficient manner.
2431 """
2432 oInstr = self.ensureInstructionForOpTag(iTagLine);
2433
2434 #
2435 # Do it section by section.
2436 #
2437 for asSectionLines in aasSections:
2438 #
2439 # Sort the input into outputs, inputs and selector conditions.
2440 #
2441 sFlatSection = self.flattenAllSections([asSectionLines,]);
2442 if not sFlatSection:
2443 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2444 continue;
2445 oTest = InstructionTest(oInstr);
2446
2447 asSelectors = [];
2448 asInputs = [];
2449 asOutputs = [];
2450 asCur = asOutputs;
2451 fRc = True;
2452 asWords = sFlatSection.split();
2453 for iWord in range(len(asWords) - 1, -1, -1):
2454 sWord = asWords[iWord];
2455 # Check for array switchers.
2456 if sWord == '->':
2457 if asCur != asOutputs:
2458 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2459 break;
2460 asCur = asInputs;
2461 elif sWord == '/':
2462 if asCur != asInputs:
2463 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2464 break;
2465 asCur = asSelectors;
2466 else:
2467 asCur.insert(0, sWord);
2468
2469 #
2470 # Validate and add selectors.
2471 #
2472 for sCond in asSelectors:
2473 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2474 oSelector = None;
2475 for sOp in TestSelector.kasCompareOps:
2476 off = sCondExp.find(sOp);
2477 if off >= 0:
2478 sVariable = sCondExp[:off];
2479 sValue = sCondExp[off + len(sOp):];
2480 if sVariable in TestSelector.kdVariables:
2481 if sValue in TestSelector.kdVariables[sVariable]:
2482 oSelector = TestSelector(sVariable, sOp, sValue);
2483 else:
2484 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2485 % ( sTag, sValue, sCond,
2486 TestSelector.kdVariables[sVariable].keys(),));
2487 else:
2488 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2489 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2490 break;
2491 if oSelector is not None:
2492 for oExisting in oTest.aoSelectors:
2493 if oExisting.sVariable == oSelector.sVariable:
2494 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2495 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2496 oTest.aoSelectors.append(oSelector);
2497 else:
2498 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2499
2500 #
2501 # Validate outputs and inputs, adding them to the test as we go along.
2502 #
2503 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2504 asValidFieldKinds = [ 'both', sDesc, ];
2505 for sItem in asItems:
2506 oItem = None;
2507 for sOp in TestInOut.kasOperators:
2508 off = sItem.find(sOp);
2509 if off < 0:
2510 continue;
2511 sField = sItem[:off];
2512 sValueType = sItem[off + len(sOp):];
2513 if sField in TestInOut.kdFields \
2514 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2515 asSplit = sValueType.split(':', 1);
2516 sValue = asSplit[0];
2517 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2518 if sType in TestInOut.kdTypes:
2519 oValid = TestInOut.kdTypes[sType].validate(sValue);
2520 if oValid is True:
2521 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2522 oItem = TestInOut(sField, sOp, sValue, sType);
2523 else:
2524 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2525 % ( sTag, sDesc, sItem, ));
2526 else:
2527 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2528 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2529 else:
2530 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2531 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2532 else:
2533 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2534 % ( sTag, sDesc, sField, sItem,
2535 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2536 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2537 break;
2538 if oItem is not None:
2539 for oExisting in aoDst:
2540 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2541 self.errorComment(iTagLine,
2542 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2543 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2544 aoDst.append(oItem);
2545 else:
2546 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2547
2548 #
2549 # .
2550 #
2551 if fRc:
2552 oInstr.aoTests.append(oTest);
2553 else:
2554 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2555 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2556 % (sTag, asSelectors, asInputs, asOutputs,));
2557
2558 _ = iEndLine;
2559 return True;
2560
2561 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2562 """
2563 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2564 """
2565 oInstr = self.ensureInstructionForOpTag(iTagLine);
2566
2567 iTest = 0;
2568 if sTag[-1] == ']':
2569 iTest = int(sTag[8:-1]);
2570 else:
2571 iTest = int(sTag[7:]);
2572
2573 if iTest != len(oInstr.aoTests):
2574 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2575 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2576
2577 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2578 """
2579 Tag: \@optestign | \@optestignore
2580 Value: <value is ignored>
2581
2582 This is a simple trick to ignore a test while debugging another.
2583
2584 See also \@oponlytest.
2585 """
2586 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2587 return True;
2588
2589 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2590 """
2591 Tag: \@opcopytests
2592 Value: <opstat | function> [..]
2593 Example: \@opcopytests add_Eb_Gb
2594
2595 Trick to avoid duplicating tests for different encodings of the same
2596 operation.
2597 """
2598 oInstr = self.ensureInstructionForOpTag(iTagLine);
2599
2600 # Flatten, validate and append the copy job to the instruction. We execute
2601 # them after parsing all the input so we can handle forward references.
2602 asToCopy = self.flattenAllSections(aasSections).split();
2603 if not asToCopy:
2604 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2605 for sToCopy in asToCopy:
2606 if sToCopy not in oInstr.asCopyTests:
2607 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2608 oInstr.asCopyTests.append(sToCopy);
2609 else:
2610 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2611 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2612 else:
2613 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2614
2615 _ = iEndLine;
2616 return True;
2617
2618 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2619 """
2620 Tag: \@oponlytest | \@oponly
2621 Value: none
2622
2623 Only test instructions with this tag. This is a trick that is handy
2624 for singling out one or two new instructions or tests.
2625
2626 See also \@optestignore.
2627 """
2628 oInstr = self.ensureInstructionForOpTag(iTagLine);
2629
2630 # Validate and add instruction to only test dictionary.
2631 sValue = self.flattenAllSections(aasSections).strip();
2632 if sValue:
2633 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2634
2635 if oInstr not in g_aoOnlyTestInstructions:
2636 g_aoOnlyTestInstructions.append(oInstr);
2637
2638 _ = iEndLine;
2639 return True;
2640
2641 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2642 """
2643 Tag: \@opxcpttype
2644 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2645
2646 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2647 """
2648 oInstr = self.ensureInstructionForOpTag(iTagLine);
2649
2650 # Flatten as a space separated list, split it up and validate the values.
2651 asTypes = self.flattenAllSections(aasSections).split();
2652 if len(asTypes) != 1:
2653 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2654 sType = asTypes[0];
2655 if sType not in g_kdXcptTypes:
2656 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2657 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2658 # Set it.
2659 if oInstr.sXcptType is not None:
2660 return self.errorComment(iTagLine,
2661 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2662 % ( sTag, oInstr.sXcptType, sType,));
2663 oInstr.sXcptType = sType;
2664
2665 _ = iEndLine;
2666 return True;
2667
2668 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2669 """
2670 Tag: \@opfunction
2671 Value: <VMM function name>
2672
2673 This is for explicitly setting the IEM function name. Normally we pick
2674 this up from the FNIEMOP_XXX macro invocation after the description, or
2675 generate it from the mnemonic and operands.
2676
2677 It it thought it maybe necessary to set it when specifying instructions
2678 which implementation isn't following immediately or aren't implemented yet.
2679 """
2680 oInstr = self.ensureInstructionForOpTag(iTagLine);
2681
2682 # Flatten and validate the value.
2683 sFunction = self.flattenAllSections(aasSections);
2684 if not self.oReFunctionName.match(sFunction):
2685 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2686 % (sTag, sFunction, self.oReFunctionName.pattern));
2687
2688 if oInstr.sFunction is not None:
2689 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2690 % (sTag, oInstr.sFunction, sFunction,));
2691 oInstr.sFunction = sFunction;
2692
2693 _ = iEndLine;
2694 return True;
2695
2696 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2697 """
2698 Tag: \@opstats
2699 Value: <VMM statistics base name>
2700
2701 This is for explicitly setting the statistics name. Normally we pick
2702 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2703 the mnemonic and operands.
2704
2705 It it thought it maybe necessary to set it when specifying instructions
2706 which implementation isn't following immediately or aren't implemented yet.
2707 """
2708 oInstr = self.ensureInstructionForOpTag(iTagLine);
2709
2710 # Flatten and validate the value.
2711 sStats = self.flattenAllSections(aasSections);
2712 if not self.oReStatsName.match(sStats):
2713 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2714 % (sTag, sStats, self.oReStatsName.pattern));
2715
2716 if oInstr.sStats is not None:
2717 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2718 % (sTag, oInstr.sStats, sStats,));
2719 oInstr.sStats = sStats;
2720
2721 _ = iEndLine;
2722 return True;
2723
2724 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2725 """
2726 Tag: \@opdone
2727 Value: none
2728
2729 Used to explictily flush the instructions that have been specified.
2730 """
2731 sFlattened = self.flattenAllSections(aasSections);
2732 if sFlattened != '':
2733 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2734 _ = sTag; _ = iEndLine;
2735 return self.doneInstructions();
2736
2737 ## @}
2738
2739
2740 def parseComment(self):
2741 """
2742 Parse the current comment (self.sComment).
2743
2744 If it's a opcode specifiying comment, we reset the macro stuff.
2745 """
2746 #
2747 # Reject if comment doesn't seem to contain anything interesting.
2748 #
2749 if self.sComment.find('Opcode') < 0 \
2750 and self.sComment.find('@') < 0:
2751 return False;
2752
2753 #
2754 # Split the comment into lines, removing leading asterisks and spaces.
2755 # Also remove leading and trailing empty lines.
2756 #
2757 asLines = self.sComment.split('\n');
2758 for iLine, sLine in enumerate(asLines):
2759 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2760
2761 while asLines and not asLines[0]:
2762 self.iCommentLine += 1;
2763 asLines.pop(0);
2764
2765 while asLines and not asLines[-1]:
2766 asLines.pop(len(asLines) - 1);
2767
2768 #
2769 # Check for old style: Opcode 0x0f 0x12
2770 #
2771 if asLines[0].startswith('Opcode '):
2772 self.parseCommentOldOpcode(asLines);
2773
2774 #
2775 # Look for @op* tagged data.
2776 #
2777 cOpTags = 0;
2778 sFlatDefault = None;
2779 sCurTag = '@default';
2780 iCurTagLine = 0;
2781 asCurSection = [];
2782 aasSections = [ asCurSection, ];
2783 for iLine, sLine in enumerate(asLines):
2784 if not sLine.startswith('@'):
2785 if sLine:
2786 asCurSection.append(sLine);
2787 elif asCurSection:
2788 asCurSection = [];
2789 aasSections.append(asCurSection);
2790 else:
2791 #
2792 # Process the previous tag.
2793 #
2794 if not asCurSection and len(aasSections) > 1:
2795 aasSections.pop(-1);
2796 if sCurTag in self.dTagHandlers:
2797 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2798 cOpTags += 1;
2799 elif sCurTag.startswith('@op'):
2800 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2801 elif sCurTag == '@default':
2802 sFlatDefault = self.flattenAllSections(aasSections);
2803 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2804 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2805 elif sCurTag in ['@encoding', '@opencoding']:
2806 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2807
2808 #
2809 # New tag.
2810 #
2811 asSplit = sLine.split(None, 1);
2812 sCurTag = asSplit[0].lower();
2813 if len(asSplit) > 1:
2814 asCurSection = [asSplit[1],];
2815 else:
2816 asCurSection = [];
2817 aasSections = [asCurSection, ];
2818 iCurTagLine = iLine;
2819
2820 #
2821 # Process the final tag.
2822 #
2823 if not asCurSection and len(aasSections) > 1:
2824 aasSections.pop(-1);
2825 if sCurTag in self.dTagHandlers:
2826 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2827 cOpTags += 1;
2828 elif sCurTag.startswith('@op'):
2829 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2830 elif sCurTag == '@default':
2831 sFlatDefault = self.flattenAllSections(aasSections);
2832
2833 #
2834 # Don't allow default text in blocks containing @op*.
2835 #
2836 if cOpTags > 0 and sFlatDefault:
2837 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2838
2839 return True;
2840
2841 def parseMacroInvocation(self, sInvocation):
2842 """
2843 Parses a macro invocation.
2844
2845 Returns a tuple, first element is the offset following the macro
2846 invocation. The second element is a list of macro arguments, where the
2847 zero'th is the macro name.
2848 """
2849 # First the name.
2850 offOpen = sInvocation.find('(');
2851 if offOpen <= 0:
2852 self.raiseError("macro invocation open parenthesis not found");
2853 sName = sInvocation[:offOpen].strip();
2854 if not self.oReMacroName.match(sName):
2855 return self.error("invalid macro name '%s'" % (sName,));
2856 asRet = [sName, ];
2857
2858 # Arguments.
2859 iLine = self.iLine;
2860 cDepth = 1;
2861 off = offOpen + 1;
2862 offStart = off;
2863 while cDepth > 0:
2864 if off >= len(sInvocation):
2865 if iLine >= len(self.asLines):
2866 return self.error('macro invocation beyond end of file');
2867 sInvocation += self.asLines[iLine];
2868 iLine += 1;
2869 ch = sInvocation[off];
2870
2871 if ch == ',' or ch == ')':
2872 if cDepth == 1:
2873 asRet.append(sInvocation[offStart:off].strip());
2874 offStart = off + 1;
2875 if ch == ')':
2876 cDepth -= 1;
2877 elif ch == '(':
2878 cDepth += 1;
2879 off += 1;
2880
2881 return (off, asRet);
2882
2883 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2884 """
2885 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2886 """
2887 offHit = sCode.find(sMacro);
2888 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2889 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2890 return (offHit + offAfter, asRet);
2891 return (len(sCode), None);
2892
2893 def findAndParseMacroInvocation(self, sCode, sMacro):
2894 """
2895 Returns None if not found, arguments as per parseMacroInvocation if found.
2896 """
2897 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2898
2899 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2900 """
2901 Returns same as findAndParseMacroInvocation.
2902 """
2903 for sMacro in asMacro:
2904 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2905 if asRet is not None:
2906 return asRet;
2907 return None;
2908
2909 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2910 sDisHints, sIemHints, asOperands):
2911 """
2912 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2913 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2914 """
2915 #
2916 # Some invocation checks.
2917 #
2918 if sUpper != sUpper.upper():
2919 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2920 if sLower != sLower.lower():
2921 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2922 if sUpper.lower() != sLower:
2923 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2924 if not self.oReMnemonic.match(sLower):
2925 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2926
2927 #
2928 # Check if sIemHints tells us to not consider this macro invocation.
2929 #
2930 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2931 return True;
2932
2933 # Apply to the last instruction only for now.
2934 if not self.aoCurInstrs:
2935 self.addInstruction();
2936 oInstr = self.aoCurInstrs[-1];
2937 if oInstr.iLineMnemonicMacro == -1:
2938 oInstr.iLineMnemonicMacro = self.iLine;
2939 else:
2940 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2941 % (sMacro, oInstr.iLineMnemonicMacro,));
2942
2943 # Mnemonic
2944 if oInstr.sMnemonic is None:
2945 oInstr.sMnemonic = sLower;
2946 elif oInstr.sMnemonic != sLower:
2947 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2948
2949 # Process operands.
2950 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2951 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2952 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2953 for iOperand, sType in enumerate(asOperands):
2954 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2955 if sWhere is None:
2956 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2957 if iOperand < len(oInstr.aoOperands): # error recovery.
2958 sWhere = oInstr.aoOperands[iOperand].sWhere;
2959 sType = oInstr.aoOperands[iOperand].sType;
2960 else:
2961 sWhere = 'reg';
2962 sType = 'Gb';
2963 if iOperand == len(oInstr.aoOperands):
2964 oInstr.aoOperands.append(Operand(sWhere, sType))
2965 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2966 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2967 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2968 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2969
2970 # Encoding.
2971 if sForm not in g_kdIemForms:
2972 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2973 else:
2974 if oInstr.sEncoding is None:
2975 oInstr.sEncoding = g_kdIemForms[sForm][0];
2976 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2977 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2978 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2979
2980 # Check the parameter locations for the encoding.
2981 if g_kdIemForms[sForm][1] is not None:
2982 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
2983 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
2984 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
2985 else:
2986 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2987 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2988 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2989 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2990
2991 # Stats.
2992 if not self.oReStatsName.match(sStats):
2993 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2994 elif oInstr.sStats is None:
2995 oInstr.sStats = sStats;
2996 elif oInstr.sStats != sStats:
2997 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2998 % (sMacro, oInstr.sStats, sStats,));
2999
3000 # Process the hints (simply merge with @ophints w/o checking anything).
3001 for sHint in sDisHints.split('|'):
3002 sHint = sHint.strip();
3003 if sHint.startswith('DISOPTYPE_'):
3004 sShortHint = sHint[len('DISOPTYPE_'):].lower();
3005 if sShortHint in g_kdHints:
3006 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3007 else:
3008 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
3009 elif sHint != '0':
3010 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
3011
3012 for sHint in sIemHints.split('|'):
3013 sHint = sHint.strip();
3014 if sHint.startswith('IEMOPHINT_'):
3015 sShortHint = sHint[len('IEMOPHINT_'):].lower();
3016 if sShortHint in g_kdHints:
3017 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3018 else:
3019 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
3020 elif sHint != '0':
3021 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
3022
3023
3024 _ = sAsm;
3025 return True;
3026
3027 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
3028 """
3029 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
3030 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
3031 """
3032 if not asOperands:
3033 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3034 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
3035 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3036
3037 def checkCodeForMacro(self, sCode):
3038 """
3039 Checks code for relevant macro invocation.
3040 """
3041 #
3042 # Scan macro invocations.
3043 #
3044 if sCode.find('(') > 0:
3045 # Look for instruction decoder function definitions. ASSUME single line.
3046 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3047 [ 'FNIEMOP_DEF',
3048 'FNIEMOP_STUB',
3049 'FNIEMOP_STUB_1',
3050 'FNIEMOP_UD_STUB',
3051 'FNIEMOP_UD_STUB_1' ]);
3052 if asArgs is not None:
3053 sFunction = asArgs[1];
3054
3055 if not self.aoCurInstrs:
3056 self.addInstruction();
3057 for oInstr in self.aoCurInstrs:
3058 if oInstr.iLineFnIemOpMacro == -1:
3059 oInstr.iLineFnIemOpMacro = self.iLine;
3060 else:
3061 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3062 self.setInstrunctionAttrib('sFunction', sFunction);
3063 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3064 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3065 if asArgs[0].find('STUB') > 0:
3066 self.doneInstructions();
3067 return True;
3068
3069 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3070 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3071 if asArgs is not None:
3072 if len(self.aoCurInstrs) == 1:
3073 oInstr = self.aoCurInstrs[0];
3074 if oInstr.sStats is None:
3075 oInstr.sStats = asArgs[1];
3076 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3077
3078 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3079 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3080 if asArgs is not None:
3081 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3082 []);
3083 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3084 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3085 if asArgs is not None:
3086 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3087 [asArgs[6],]);
3088 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3089 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3090 if asArgs is not None:
3091 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3092 [asArgs[6], asArgs[7]]);
3093 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3094 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3095 if asArgs is not None:
3096 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3097 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3098 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3099 # a_fIemHints)
3100 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3101 if asArgs is not None:
3102 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3103 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3104
3105 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3106 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3107 if asArgs is not None:
3108 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3109 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3110 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3111 if asArgs is not None:
3112 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3113 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3114 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3115 if asArgs is not None:
3116 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3117 [asArgs[4], asArgs[5],]);
3118 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3119 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3120 if asArgs is not None:
3121 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3122 [asArgs[4], asArgs[5], asArgs[6],]);
3123 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3124 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3125 if asArgs is not None:
3126 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3127 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3128
3129 return False;
3130
3131
3132 def parse(self):
3133 """
3134 Parses the given file.
3135 Returns number or errors.
3136 Raises exception on fatal trouble.
3137 """
3138 #self.debug('Parsing %s' % (self.sSrcFile,));
3139
3140 while self.iLine < len(self.asLines):
3141 sLine = self.asLines[self.iLine];
3142 self.iLine += 1;
3143
3144 # We only look for comments, so only lines with a slash might possibly
3145 # influence the parser state.
3146 offSlash = sLine.find('/');
3147 if offSlash >= 0:
3148 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3149 offLine = 0;
3150 while offLine < len(sLine):
3151 if self.iState == self.kiCode:
3152 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3153 if offHit >= 0:
3154 self.checkCodeForMacro(sLine[offLine:offHit]);
3155 self.sComment = '';
3156 self.iCommentLine = self.iLine;
3157 self.iState = self.kiCommentMulti;
3158 offLine = offHit + 2;
3159 else:
3160 self.checkCodeForMacro(sLine[offLine:]);
3161 offLine = len(sLine);
3162
3163 elif self.iState == self.kiCommentMulti:
3164 offHit = sLine.find('*/', offLine);
3165 if offHit >= 0:
3166 self.sComment += sLine[offLine:offHit];
3167 self.iState = self.kiCode;
3168 offLine = offHit + 2;
3169 self.parseComment();
3170 else:
3171 self.sComment += sLine[offLine:];
3172 offLine = len(sLine);
3173 else:
3174 assert False;
3175 # C++ line comment.
3176 elif offSlash > 0:
3177 self.checkCodeForMacro(sLine[:offSlash]);
3178
3179 # No slash, but append the line if in multi-line comment.
3180 elif self.iState == self.kiCommentMulti:
3181 #self.debug('line %d: multi' % (self.iLine,));
3182 self.sComment += sLine;
3183
3184 # No slash, but check code line for relevant macro.
3185 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3186 #self.debug('line %d: macro' % (self.iLine,));
3187 self.checkCodeForMacro(sLine);
3188
3189 # If the line is a '}' in the first position, complete the instructions.
3190 elif self.iState == self.kiCode and sLine[0] == '}':
3191 #self.debug('line %d: }' % (self.iLine,));
3192 self.doneInstructions();
3193
3194 self.doneInstructions();
3195 self.debug('%3s stubs out of %3s instructions in %s'
3196 % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3197 return self.printErrors();
3198
3199
3200def __parseFileByName(sSrcFile, sDefaultMap):
3201 """
3202 Parses one source file for instruction specfications.
3203 """
3204 #
3205 # Read sSrcFile into a line array.
3206 #
3207 try:
3208 oFile = open(sSrcFile, "r");
3209 except Exception as oXcpt:
3210 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3211 try:
3212 asLines = oFile.readlines();
3213 except Exception as oXcpt:
3214 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3215 finally:
3216 oFile.close();
3217
3218 #
3219 # Do the parsing.
3220 #
3221 try:
3222 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3223 except ParserException as oXcpt:
3224 print(str(oXcpt));
3225 raise;
3226 except Exception as oXcpt:
3227 raise;
3228
3229 return cErrors;
3230
3231
3232def __doTestCopying():
3233 """
3234 Executes the asCopyTests instructions.
3235 """
3236 asErrors = [];
3237 for oDstInstr in g_aoAllInstructions:
3238 if oDstInstr.asCopyTests:
3239 for sSrcInstr in oDstInstr.asCopyTests:
3240 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3241 if oSrcInstr:
3242 aoSrcInstrs = [oSrcInstr,];
3243 else:
3244 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3245 if aoSrcInstrs:
3246 for oSrcInstr in aoSrcInstrs:
3247 if oSrcInstr != oDstInstr:
3248 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3249 else:
3250 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3251 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3252 else:
3253 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3254 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3255
3256 if asErrors:
3257 sys.stderr.write(u''.join(asErrors));
3258 return len(asErrors);
3259
3260
3261def __applyOnlyTest():
3262 """
3263 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3264 all other instructions so that only these get tested.
3265 """
3266 if g_aoOnlyTestInstructions:
3267 for oInstr in g_aoAllInstructions:
3268 if oInstr.aoTests:
3269 if oInstr not in g_aoOnlyTestInstructions:
3270 oInstr.aoTests = [];
3271 return 0;
3272
3273def __parseAll():
3274 """
3275 Parses all the IEMAllInstruction*.cpp.h files.
3276
3277 Raises exception on failure.
3278 """
3279 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3280 cErrors = 0;
3281 for sDefaultMap, sName in [
3282 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3283 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3284 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3285 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3286 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3287 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3288 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3289 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3290 ]:
3291 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3292 cErrors += __doTestCopying();
3293 cErrors += __applyOnlyTest();
3294
3295 if cErrors != 0:
3296 #raise Exception('%d parse errors' % (cErrors,));
3297 sys.exit(1);
3298 return True;
3299
3300
3301
3302__parseAll();
3303
3304
3305#
3306# Generators (may perhaps move later).
3307#
3308def generateDisassemblerTables(oDstFile = sys.stdout):
3309 """
3310 Generates disassembler tables.
3311 """
3312
3313 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3314 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3315 assert oMap.sName == sName;
3316 asLines = [];
3317
3318 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3319 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3320 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3321 asLines.append('{');
3322
3323 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3324
3325 aoTableOrder = oMap.getInstructionsInTableOrder();
3326 for iInstr, oInstr in enumerate(aoTableOrder):
3327
3328 if (iInstr & 0xf) == 0:
3329 if iInstr != 0:
3330 asLines.append('');
3331 asLines.append(' /* %x */' % (iInstr >> 4,));
3332
3333 if oInstr is None:
3334 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3335 elif isinstance(oInstr, list):
3336 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3337 else:
3338 sMacro = 'OP';
3339 cMaxOperands = 3;
3340 if len(oInstr.aoOperands) > 3:
3341 sMacro = 'OPVEX'
3342 cMaxOperands = 4;
3343 assert len(oInstr.aoOperands) <= cMaxOperands;
3344
3345 #
3346 # Format string.
3347 #
3348 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3349 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3350 sTmp += ' ' if iOperand == 0 else ',';
3351 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3352 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3353 else:
3354 sTmp += g_kdOpTypes[oOperand.sType][2];
3355 sTmp += '",';
3356 asColumns = [ sTmp, ];
3357
3358 #
3359 # Decoders.
3360 #
3361 iStart = len(asColumns);
3362 if oInstr.sEncoding is None:
3363 pass;
3364 elif oInstr.sEncoding == 'ModR/M':
3365 # ASSUME the first operand is using the ModR/M encoding
3366 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3367 asColumns.append('IDX_ParseModRM,');
3368 ## @todo IDX_ParseVexDest
3369 # Is second operand using ModR/M too?
3370 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3371 asColumns.append('IDX_UseModRM,')
3372 elif oInstr.sEncoding in [ 'prefix', ]:
3373 for oOperand in oInstr.aoOperands:
3374 asColumns.append('0,');
3375 elif oInstr.sEncoding in [ 'fixed' ]:
3376 pass;
3377 elif oInstr.sEncoding == 'vex2':
3378 asColumns.append('IDX_ParseVex2b,')
3379 elif oInstr.sEncoding == 'vex3':
3380 asColumns.append('IDX_ParseVex3b,')
3381 elif oInstr.sEncoding in g_dInstructionMaps:
3382 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3383 else:
3384 ## @todo
3385 #IDX_ParseTwoByteEsc,
3386 #IDX_ParseGrp1,
3387 #IDX_ParseShiftGrp2,
3388 #IDX_ParseGrp3,
3389 #IDX_ParseGrp4,
3390 #IDX_ParseGrp5,
3391 #IDX_Parse3DNow,
3392 #IDX_ParseGrp6,
3393 #IDX_ParseGrp7,
3394 #IDX_ParseGrp8,
3395 #IDX_ParseGrp9,
3396 #IDX_ParseGrp10,
3397 #IDX_ParseGrp12,
3398 #IDX_ParseGrp13,
3399 #IDX_ParseGrp14,
3400 #IDX_ParseGrp15,
3401 #IDX_ParseGrp16,
3402 #IDX_ParseThreeByteEsc4,
3403 #IDX_ParseThreeByteEsc5,
3404 #IDX_ParseModFence,
3405 #IDX_ParseEscFP,
3406 #IDX_ParseNopPause,
3407 #IDX_ParseInvOpModRM,
3408 assert False, str(oInstr);
3409
3410 # Check for immediates and stuff in the remaining operands.
3411 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3412 sIdx = g_kdOpTypes[oOperand.sType][0];
3413 if sIdx != 'IDX_UseModRM':
3414 asColumns.append(sIdx + ',');
3415 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3416
3417 #
3418 # Opcode and operands.
3419 #
3420 assert oInstr.sDisEnum, str(oInstr);
3421 asColumns.append(oInstr.sDisEnum + ',');
3422 iStart = len(asColumns)
3423 for oOperand in oInstr.aoOperands:
3424 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3425 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3426
3427 #
3428 # Flags.
3429 #
3430 sTmp = '';
3431 for sHint in sorted(oInstr.dHints.keys()):
3432 sDefine = g_kdHints[sHint];
3433 if sDefine.startswith('DISOPTYPE_'):
3434 if sTmp:
3435 sTmp += ' | ' + sDefine;
3436 else:
3437 sTmp += sDefine;
3438 if sTmp:
3439 sTmp += '),';
3440 else:
3441 sTmp += '0),';
3442 asColumns.append(sTmp);
3443
3444 #
3445 # Format the columns into a line.
3446 #
3447 sLine = '';
3448 for i, s in enumerate(asColumns):
3449 if len(sLine) < aoffColumns[i]:
3450 sLine += ' ' * (aoffColumns[i] - len(sLine));
3451 else:
3452 sLine += ' ';
3453 sLine += s;
3454
3455 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3456 # DISOPTYPE_HARMLESS),
3457 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3458 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3459
3460 asLines.append(sLine);
3461
3462 asLines.append('};');
3463 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3464
3465 #
3466 # Write out the lines.
3467 #
3468 oDstFile.write('\n'.join(asLines));
3469 oDstFile.write('\n');
3470 break; #for now
3471
3472if __name__ == '__main__':
3473 generateDisassemblerTables();
3474
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette