VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66935

Last change on this file since 66935 was 66935, checked in by vboxsync, 8 years ago

IEM: Implemented vmovlpd Vq,Hq,Mq (VEX.66.0F 12 mod!=3).

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 144.3 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66935 2017-05-17 12:09:30Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66935 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
217 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
218 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
219 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
220 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
221 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
222 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
223 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
224 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
225 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
226 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
227 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
228 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
229 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
230
231 # ModR/M.rm - register only.
232 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
233 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
234 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
235 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
236 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
237 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
238 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', ),
239
240 # ModR/M.rm - memory only.
241 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
242 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
243 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
244 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
245 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
246 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
247 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
248 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
249 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
250
251 # ModR/M.reg
252 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
253 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
254 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
255 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
256 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', ),
257 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
258 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
259 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
260 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
261 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
262 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
263 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
264 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
265 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
266 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
267 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
268 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
269 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', ),
270 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
271 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
272 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', ),
273
274 # VEX.vvvv
275 'HdqCss': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HdqCss', ),
276 'HdqCsd': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HdqCsd', ),
277 'HdqCq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HdqCq', ),
278 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', ),
279
280 # Immediate values.
281 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
282 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
283 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
284 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
285 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
286 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
287
288 # Address operands (no ModR/M).
289 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
290 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
291
292 # Relative jump targets
293 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
294 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
295
296 # DS:rSI
297 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
298 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
299 # ES:rDI
300 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
301 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
302
303 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
304
305 # Fixed registers.
306 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
307 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
308 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
309 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
310 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
311 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
312 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
313 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
314};
315
316# IDX_ParseFixedReg
317# IDX_ParseVexDest
318
319
320## IEMFORM_XXX mappings.
321g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
322 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
323 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
324 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
325 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
326 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
327 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
328 'M': ( 'ModR/M', [ 'rm', ], ),
329 'M_REG': ( 'ModR/M', [ 'rm', ], ),
330 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
331 'R': ( 'ModR/M', [ 'reg', ], ),
332
333 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
334 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
335 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
336 'VEX_XM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ), # same as VEX_RM_MEM, typo?
337 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
338 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
339 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
340 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
341 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
342 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
343 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
344 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
345 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
346 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
347 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
348 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
349 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
350
351 'FIXED': ( 'fixed', None, )
352};
353
354## \@oppfx values.
355g_kdPrefixes = {
356 'none': [],
357 '0x66': [],
358 '0xf3': [],
359 '0xf2': [],
360};
361
362## Special \@opcode tag values.
363g_kdSpecialOpcodes = {
364 '/reg': [],
365 'mr/reg': [],
366 '11 /reg': [],
367 '!11 /reg': [],
368 '11 mr/reg': [],
369 '!11 mr/reg': [],
370};
371
372## Special \@opcodesub tag values.
373g_kdSubOpcodes = {
374 'none': [ None, ],
375 '11 mr/reg': [ '11 mr/reg', ],
376 '11': [ '11 mr/reg', ], ##< alias
377 '!11 mr/reg': [ '!11 mr/reg', ],
378 '!11': [ '!11 mr/reg', ], ##< alias
379};
380
381## Valid values for \@openc
382g_kdEncodings = {
383 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
384 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
385 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
386 'prefix': [ None, ], ##< Prefix
387};
388
389## \@opunused, \@opinvalid, \@opinvlstyle
390g_kdInvalidStyles = {
391 'immediate': [], ##< CPU stops decoding immediately after the opcode.
392 'intel-modrm': [], ##< Intel decodes ModR/M.
393 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
394 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
395 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
396};
397
398g_kdCpuNames = {
399 '8086': (),
400 '80186': (),
401 '80286': (),
402 '80386': (),
403 '80486': (),
404};
405
406## \@opcpuid
407g_kdCpuIdFlags = {
408 'vme': 'X86_CPUID_FEATURE_EDX_VME',
409 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
410 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
411 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
412 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
413 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
414 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
415 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
416 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
417 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
418 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
419 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
420 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
421 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
422 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
423 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
424 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
425 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
426 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
427 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
428 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
429 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
430 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
431 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
432 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
433 'aes': 'X86_CPUID_FEATURE_ECX_AES',
434 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
435 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
436 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
437 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
438
439 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
440 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
441 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
442 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
443 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
444 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
445 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
446 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
447 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
448 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
449};
450
451## \@ophints values.
452g_kdHints = {
453 'invalid': 'DISOPTYPE_INVALID', ##<
454 'harmless': 'DISOPTYPE_HARMLESS', ##<
455 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
456 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
457 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
458 'portio': 'DISOPTYPE_PORTIO', ##<
459 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
460 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
461 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
462 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
463 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
464 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
465 'illegal': 'DISOPTYPE_ILLEGAL', ##<
466 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
467 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
468 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
469 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
470 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
471 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
472 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
473 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
474 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
475 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
476 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
477 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
478 ## (only in 16 & 32 bits mode!)
479 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
480 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
481 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
482 'ignores_op_size': '', ##< Ignores both operand size prefixes.
483 'lock_allowed': '', ##< Lock prefix allowed.
484};
485
486## \@opxcpttype values (see SDMv2 2.4, 2.7).
487g_kdXcptTypes = {
488 'none': [],
489 '1': [],
490 '2': [],
491 '3': [],
492 '4': [],
493 '4UA': [],
494 '5': [],
495 '5LZ': [], # LZ = VEX.L must be zero.
496 '6': [],
497 '7': [],
498 '7LZ': [],
499 '8': [],
500 '11': [],
501 '12': [],
502 'E1': [],
503 'E1NF': [],
504 'E2': [],
505 'E3': [],
506 'E3NF': [],
507 'E4': [],
508 'E4NF': [],
509 'E5': [],
510 'E5NF': [],
511 'E6': [],
512 'E6NF': [],
513 'E7NF': [],
514 'E9': [],
515 'E9NF': [],
516 'E10': [],
517 'E11': [],
518 'E12': [],
519 'E12NF': [],
520};
521
522
523def _isValidOpcodeByte(sOpcode):
524 """
525 Checks if sOpcode is a valid lower case opcode byte.
526 Returns true/false.
527 """
528 if len(sOpcode) == 4:
529 if sOpcode[:2] == '0x':
530 if sOpcode[2] in '0123456789abcdef':
531 if sOpcode[3] in '0123456789abcdef':
532 return True;
533 return False;
534
535
536class InstructionMap(object):
537 """
538 Instruction map.
539
540 The opcode map provides the lead opcode bytes (empty for the one byte
541 opcode map). An instruction can be member of multiple opcode maps as long
542 as it uses the same opcode value within the map (because of VEX).
543 """
544
545 kdEncodings = {
546 'legacy': [],
547 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
548 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
549 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
550 'xop8': [], ##< XOP prefix with vvvvv = 8
551 'xop9': [], ##< XOP prefix with vvvvv = 9
552 'xop10': [], ##< XOP prefix with vvvvv = 10
553 };
554 ## Selectors.
555 ## The first value is the number of table entries required by a
556 ## decoder or disassembler for this type of selector.
557 kdSelectors = {
558 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
559 '/r': [ 8, ], ##< modrm.reg selects the instruction.
560 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
561 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
562 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
563 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
564 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
565 };
566
567 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
568 assert sSelector in self.kdSelectors;
569 assert sEncoding in self.kdEncodings;
570 if asLeadOpcodes is None:
571 asLeadOpcodes = [];
572 else:
573 for sOpcode in asLeadOpcodes:
574 assert _isValidOpcodeByte(sOpcode);
575 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
576
577 self.sName = sName;
578 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
579 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
580 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
581 self.aoInstructions = []; # type: Instruction
582 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
583
584 def getTableSize(self):
585 """
586 Number of table entries. This corresponds directly to the selector.
587 """
588 return self.kdSelectors[self.sSelector][0];
589
590 def getInstructionIndex(self, oInstr):
591 """
592 Returns the table index for the instruction.
593 """
594 bOpcode = oInstr.getOpcodeByte();
595
596 # The byte selector is simple. We need a full opcode byte and need just return it.
597 if self.sSelector == 'byte':
598 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
599 return bOpcode;
600
601 # The other selectors needs masking and shifting.
602 if self.sSelector == '/r':
603 return (bOpcode >> 3) & 0x7;
604
605 if self.sSelector == 'mod /r':
606 return (bOpcode >> 3) & 0x1f;
607
608 if self.sSelector == 'memreg /r':
609 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
610
611 if self.sSelector == '!11 /r':
612 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
613 return (bOpcode >> 3) & 0x7;
614
615 if self.sSelector == '11 /r':
616 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
617 return (bOpcode >> 3) & 0x7;
618
619 if self.sSelector == '11':
620 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
621 return bOpcode & 0x3f;
622
623 assert False, self.sSelector;
624 return -1;
625
626 def getInstructionsInTableOrder(self):
627 """
628 Get instructions in table order.
629
630 Returns array of instructions. Normally there is exactly one
631 instruction per entry. However the entry could also be None if
632 not instruction was specified for that opcode value. Or there
633 could be a list of instructions to deal with special encodings
634 where for instance prefix (e.g. REX.W) encodes a different
635 instruction or different CPUs have different instructions or
636 prefixes in the same place.
637 """
638 # Start with empty table.
639 cTable = self.getTableSize();
640 aoTable = [None] * cTable;
641
642 # Insert the instructions.
643 for oInstr in self.aoInstructions:
644 if oInstr.sOpcode:
645 idxOpcode = self.getInstructionIndex(oInstr);
646 assert idxOpcode < cTable, str(idxOpcode);
647
648 oExisting = aoTable[idxOpcode];
649 if oExisting is None:
650 aoTable[idxOpcode] = oInstr;
651 elif not isinstance(oExisting, list):
652 aoTable[idxOpcode] = list([oExisting, oInstr]);
653 else:
654 oExisting.append(oInstr);
655
656 return aoTable;
657
658
659 def getDisasTableName(self):
660 """
661 Returns the disassembler table name for this map.
662 """
663 sName = 'g_aDisas';
664 for sWord in self.sName.split('_'):
665 if sWord == 'm': # suffix indicating modrm.mod==mem
666 sName += '_m';
667 elif sWord == 'r': # suffix indicating modrm.mod==reg
668 sName += '_r';
669 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
670 sName += '_' + sWord;
671 else:
672 sWord = sWord.replace('grp', 'Grp');
673 sWord = sWord.replace('map', 'Map');
674 sName += sWord[0].upper() + sWord[1:];
675 return sName;
676
677
678 def isVexMap(self):
679 """ Returns True if a VEX map. """
680 return self.sEncoding.startswith('vex');
681
682
683class TestType(object):
684 """
685 Test value type.
686
687 This base class deals with integer like values. The fUnsigned constructor
688 parameter indicates the default stance on zero vs sign extending. It is
689 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
690 """
691 def __init__(self, sName, acbSizes = None, fUnsigned = True):
692 self.sName = sName;
693 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
694 self.fUnsigned = fUnsigned;
695
696 class BadValue(Exception):
697 """ Bad value exception. """
698 def __init__(self, sMessage):
699 Exception.__init__(self, sMessage);
700 self.sMessage = sMessage;
701
702 ## For ascii ~ operator.
703 kdHexInv = {
704 '0': 'f',
705 '1': 'e',
706 '2': 'd',
707 '3': 'c',
708 '4': 'b',
709 '5': 'a',
710 '6': '9',
711 '7': '8',
712 '8': '7',
713 '9': '6',
714 'a': '5',
715 'b': '4',
716 'c': '3',
717 'd': '2',
718 'e': '1',
719 'f': '0',
720 };
721
722 def get(self, sValue):
723 """
724 Get the shortest normal sized byte representation of oValue.
725
726 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
727 The latter form is for AND+OR pairs where the first entry is what to
728 AND with the field and the second the one or OR with.
729
730 Raises BadValue if invalid value.
731 """
732 if not sValue:
733 raise TestType.BadValue('empty value');
734
735 # Deal with sign and detect hexadecimal or decimal.
736 fSignExtend = not self.fUnsigned;
737 if sValue[0] == '-' or sValue[0] == '+':
738 fSignExtend = True;
739 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
740 else:
741 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
742
743 # try convert it to long integer.
744 try:
745 iValue = long(sValue, 16 if fHex else 10);
746 except Exception as oXcpt:
747 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
748
749 # Convert the hex string and pad it to a decent value. Negative values
750 # needs to be manually converted to something non-negative (~-n + 1).
751 if iValue >= 0:
752 sHex = hex(iValue);
753 if sys.version_info[0] < 3:
754 assert sHex[-1] == 'L';
755 sHex = sHex[:-1];
756 assert sHex[:2] == '0x';
757 sHex = sHex[2:];
758 else:
759 sHex = hex(-iValue - 1);
760 if sys.version_info[0] < 3:
761 assert sHex[-1] == 'L';
762 sHex = sHex[:-1];
763 assert sHex[:2] == '0x';
764 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
765 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
766 sHex = 'f' + sHex;
767
768 cDigits = len(sHex);
769 if cDigits <= self.acbSizes[-1] * 2:
770 for cb in self.acbSizes:
771 cNaturalDigits = cb * 2;
772 if cDigits <= cNaturalDigits:
773 break;
774 else:
775 cNaturalDigits = self.acbSizes[-1] * 2;
776 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
777 assert isinstance(cNaturalDigits, int)
778
779 if cNaturalDigits != cDigits:
780 cNeeded = cNaturalDigits - cDigits;
781 if iValue >= 0:
782 sHex = ('0' * cNeeded) + sHex;
783 else:
784 sHex = ('f' * cNeeded) + sHex;
785
786 # Invert and convert to bytearray and return it.
787 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
788
789 return ((fSignExtend, abValue),);
790
791 def validate(self, sValue):
792 """
793 Returns True if value is okay, error message on failure.
794 """
795 try:
796 self.get(sValue);
797 except TestType.BadValue as oXcpt:
798 return oXcpt.sMessage;
799 return True;
800
801 def isAndOrPair(self, sValue):
802 """
803 Checks if sValue is a pair.
804 """
805 _ = sValue;
806 return False;
807
808
809class TestTypeEflags(TestType):
810 """
811 Special value parsing for EFLAGS/RFLAGS/FLAGS.
812 """
813
814 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
815
816 def __init__(self, sName):
817 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
818
819 def get(self, sValue):
820 fClear = 0;
821 fSet = 0;
822 for sFlag in sValue.split(','):
823 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
824 if sConstant is None:
825 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
826 if sConstant[0] == '!':
827 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
828 else:
829 fSet |= g_kdX86EFlagsConstants[sConstant];
830
831 aoSet = TestType.get(self, '0x%x' % (fSet,));
832 if fClear != 0:
833 aoClear = TestType.get(self, '%#x' % (fClear,))
834 assert self.isAndOrPair(sValue) is True;
835 return (aoClear[0], aoSet[0]);
836 assert self.isAndOrPair(sValue) is False;
837 return aoSet;
838
839 def isAndOrPair(self, sValue):
840 for sZeroFlag in self.kdZeroValueFlags:
841 if sValue.find(sZeroFlag) >= 0:
842 return True;
843 return False;
844
845class TestTypeFromDict(TestType):
846 """
847 Special value parsing for CR0.
848 """
849
850 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
851
852 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
853 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
854 self.kdConstantsAndValues = kdConstantsAndValues;
855 self.sConstantPrefix = sConstantPrefix;
856
857 def get(self, sValue):
858 fValue = 0;
859 for sFlag in sValue.split(','):
860 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
861 if fFlagValue is None:
862 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
863 fValue |= fFlagValue;
864 return TestType.get(self, '0x%x' % (fValue,));
865
866
867class TestInOut(object):
868 """
869 One input or output state modifier.
870
871 This should be thought as values to modify BS3REGCTX and extended (needs
872 to be structured) state.
873 """
874 ## Assigned operators.
875 kasOperators = [
876 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
877 '&~=',
878 '&=',
879 '|=',
880 '='
881 ];
882 ## Types
883 kdTypes = {
884 'uint': TestType('uint', fUnsigned = True),
885 'int': TestType('int'),
886 'efl': TestTypeEflags('efl'),
887 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
888 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
889 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
890 };
891 ## CPU context fields.
892 kdFields = {
893 # name: ( default type, [both|input|output], )
894 # Operands.
895 'op1': ( 'uint', 'both', ), ## \@op1
896 'op2': ( 'uint', 'both', ), ## \@op2
897 'op3': ( 'uint', 'both', ), ## \@op3
898 'op4': ( 'uint', 'both', ), ## \@op4
899 # Flags.
900 'efl': ( 'efl', 'both', ),
901 'efl_undef': ( 'uint', 'output', ),
902 # 8-bit GPRs.
903 'al': ( 'uint', 'both', ),
904 'cl': ( 'uint', 'both', ),
905 'dl': ( 'uint', 'both', ),
906 'bl': ( 'uint', 'both', ),
907 'ah': ( 'uint', 'both', ),
908 'ch': ( 'uint', 'both', ),
909 'dh': ( 'uint', 'both', ),
910 'bh': ( 'uint', 'both', ),
911 'r8l': ( 'uint', 'both', ),
912 'r9l': ( 'uint', 'both', ),
913 'r10l': ( 'uint', 'both', ),
914 'r11l': ( 'uint', 'both', ),
915 'r12l': ( 'uint', 'both', ),
916 'r13l': ( 'uint', 'both', ),
917 'r14l': ( 'uint', 'both', ),
918 'r15l': ( 'uint', 'both', ),
919 # 16-bit GPRs.
920 'ax': ( 'uint', 'both', ),
921 'dx': ( 'uint', 'both', ),
922 'cx': ( 'uint', 'both', ),
923 'bx': ( 'uint', 'both', ),
924 'sp': ( 'uint', 'both', ),
925 'bp': ( 'uint', 'both', ),
926 'si': ( 'uint', 'both', ),
927 'di': ( 'uint', 'both', ),
928 'r8w': ( 'uint', 'both', ),
929 'r9w': ( 'uint', 'both', ),
930 'r10w': ( 'uint', 'both', ),
931 'r11w': ( 'uint', 'both', ),
932 'r12w': ( 'uint', 'both', ),
933 'r13w': ( 'uint', 'both', ),
934 'r14w': ( 'uint', 'both', ),
935 'r15w': ( 'uint', 'both', ),
936 # 32-bit GPRs.
937 'eax': ( 'uint', 'both', ),
938 'edx': ( 'uint', 'both', ),
939 'ecx': ( 'uint', 'both', ),
940 'ebx': ( 'uint', 'both', ),
941 'esp': ( 'uint', 'both', ),
942 'ebp': ( 'uint', 'both', ),
943 'esi': ( 'uint', 'both', ),
944 'edi': ( 'uint', 'both', ),
945 'r8d': ( 'uint', 'both', ),
946 'r9d': ( 'uint', 'both', ),
947 'r10d': ( 'uint', 'both', ),
948 'r11d': ( 'uint', 'both', ),
949 'r12d': ( 'uint', 'both', ),
950 'r13d': ( 'uint', 'both', ),
951 'r14d': ( 'uint', 'both', ),
952 'r15d': ( 'uint', 'both', ),
953 # 64-bit GPRs.
954 'rax': ( 'uint', 'both', ),
955 'rdx': ( 'uint', 'both', ),
956 'rcx': ( 'uint', 'both', ),
957 'rbx': ( 'uint', 'both', ),
958 'rsp': ( 'uint', 'both', ),
959 'rbp': ( 'uint', 'both', ),
960 'rsi': ( 'uint', 'both', ),
961 'rdi': ( 'uint', 'both', ),
962 'r8': ( 'uint', 'both', ),
963 'r9': ( 'uint', 'both', ),
964 'r10': ( 'uint', 'both', ),
965 'r11': ( 'uint', 'both', ),
966 'r12': ( 'uint', 'both', ),
967 'r13': ( 'uint', 'both', ),
968 'r14': ( 'uint', 'both', ),
969 'r15': ( 'uint', 'both', ),
970 # 16-bit, 32-bit or 64-bit registers according to operand size.
971 'oz.rax': ( 'uint', 'both', ),
972 'oz.rdx': ( 'uint', 'both', ),
973 'oz.rcx': ( 'uint', 'both', ),
974 'oz.rbx': ( 'uint', 'both', ),
975 'oz.rsp': ( 'uint', 'both', ),
976 'oz.rbp': ( 'uint', 'both', ),
977 'oz.rsi': ( 'uint', 'both', ),
978 'oz.rdi': ( 'uint', 'both', ),
979 'oz.r8': ( 'uint', 'both', ),
980 'oz.r9': ( 'uint', 'both', ),
981 'oz.r10': ( 'uint', 'both', ),
982 'oz.r11': ( 'uint', 'both', ),
983 'oz.r12': ( 'uint', 'both', ),
984 'oz.r13': ( 'uint', 'both', ),
985 'oz.r14': ( 'uint', 'both', ),
986 'oz.r15': ( 'uint', 'both', ),
987 # Control registers.
988 'cr0': ( 'cr0', 'both', ),
989 'cr4': ( 'cr4', 'both', ),
990 'xcr0': ( 'xcr0', 'both', ),
991 # FPU Registers
992 'fcw': ( 'uint', 'both', ),
993 'fsw': ( 'uint', 'both', ),
994 'ftw': ( 'uint', 'both', ),
995 'fop': ( 'uint', 'both', ),
996 'fpuip': ( 'uint', 'both', ),
997 'fpucs': ( 'uint', 'both', ),
998 'fpudp': ( 'uint', 'both', ),
999 'fpuds': ( 'uint', 'both', ),
1000 'mxcsr': ( 'uint', 'both', ),
1001 'st0': ( 'uint', 'both', ),
1002 'st1': ( 'uint', 'both', ),
1003 'st2': ( 'uint', 'both', ),
1004 'st3': ( 'uint', 'both', ),
1005 'st4': ( 'uint', 'both', ),
1006 'st5': ( 'uint', 'both', ),
1007 'st6': ( 'uint', 'both', ),
1008 'st7': ( 'uint', 'both', ),
1009 # MMX registers.
1010 'mm0': ( 'uint', 'both', ),
1011 'mm1': ( 'uint', 'both', ),
1012 'mm2': ( 'uint', 'both', ),
1013 'mm3': ( 'uint', 'both', ),
1014 'mm4': ( 'uint', 'both', ),
1015 'mm5': ( 'uint', 'both', ),
1016 'mm6': ( 'uint', 'both', ),
1017 'mm7': ( 'uint', 'both', ),
1018 # SSE registers.
1019 'xmm0': ( 'uint', 'both', ),
1020 'xmm1': ( 'uint', 'both', ),
1021 'xmm2': ( 'uint', 'both', ),
1022 'xmm3': ( 'uint', 'both', ),
1023 'xmm4': ( 'uint', 'both', ),
1024 'xmm5': ( 'uint', 'both', ),
1025 'xmm6': ( 'uint', 'both', ),
1026 'xmm7': ( 'uint', 'both', ),
1027 'xmm8': ( 'uint', 'both', ),
1028 'xmm9': ( 'uint', 'both', ),
1029 'xmm10': ( 'uint', 'both', ),
1030 'xmm11': ( 'uint', 'both', ),
1031 'xmm12': ( 'uint', 'both', ),
1032 'xmm13': ( 'uint', 'both', ),
1033 'xmm14': ( 'uint', 'both', ),
1034 'xmm15': ( 'uint', 'both', ),
1035 'xmm0.lo': ( 'uint', 'both', ),
1036 'xmm1.lo': ( 'uint', 'both', ),
1037 'xmm2.lo': ( 'uint', 'both', ),
1038 'xmm3.lo': ( 'uint', 'both', ),
1039 'xmm4.lo': ( 'uint', 'both', ),
1040 'xmm5.lo': ( 'uint', 'both', ),
1041 'xmm6.lo': ( 'uint', 'both', ),
1042 'xmm7.lo': ( 'uint', 'both', ),
1043 'xmm8.lo': ( 'uint', 'both', ),
1044 'xmm9.lo': ( 'uint', 'both', ),
1045 'xmm10.lo': ( 'uint', 'both', ),
1046 'xmm11.lo': ( 'uint', 'both', ),
1047 'xmm12.lo': ( 'uint', 'both', ),
1048 'xmm13.lo': ( 'uint', 'both', ),
1049 'xmm14.lo': ( 'uint', 'both', ),
1050 'xmm15.lo': ( 'uint', 'both', ),
1051 'xmm0.hi': ( 'uint', 'both', ),
1052 'xmm1.hi': ( 'uint', 'both', ),
1053 'xmm2.hi': ( 'uint', 'both', ),
1054 'xmm3.hi': ( 'uint', 'both', ),
1055 'xmm4.hi': ( 'uint', 'both', ),
1056 'xmm5.hi': ( 'uint', 'both', ),
1057 'xmm6.hi': ( 'uint', 'both', ),
1058 'xmm7.hi': ( 'uint', 'both', ),
1059 'xmm8.hi': ( 'uint', 'both', ),
1060 'xmm9.hi': ( 'uint', 'both', ),
1061 'xmm10.hi': ( 'uint', 'both', ),
1062 'xmm11.hi': ( 'uint', 'both', ),
1063 'xmm12.hi': ( 'uint', 'both', ),
1064 'xmm13.hi': ( 'uint', 'both', ),
1065 'xmm14.hi': ( 'uint', 'both', ),
1066 'xmm15.hi': ( 'uint', 'both', ),
1067 'xmm0.lo.zx': ( 'uint', 'both', ),
1068 'xmm1.lo.zx': ( 'uint', 'both', ),
1069 'xmm2.lo.zx': ( 'uint', 'both', ),
1070 'xmm3.lo.zx': ( 'uint', 'both', ),
1071 'xmm4.lo.zx': ( 'uint', 'both', ),
1072 'xmm5.lo.zx': ( 'uint', 'both', ),
1073 'xmm6.lo.zx': ( 'uint', 'both', ),
1074 'xmm7.lo.zx': ( 'uint', 'both', ),
1075 'xmm8.lo.zx': ( 'uint', 'both', ),
1076 'xmm9.lo.zx': ( 'uint', 'both', ),
1077 'xmm10.lo.zx': ( 'uint', 'both', ),
1078 'xmm11.lo.zx': ( 'uint', 'both', ),
1079 'xmm12.lo.zx': ( 'uint', 'both', ),
1080 'xmm13.lo.zx': ( 'uint', 'both', ),
1081 'xmm14.lo.zx': ( 'uint', 'both', ),
1082 'xmm15.lo.zx': ( 'uint', 'both', ),
1083 'xmm0.dw0': ( 'uint', 'both', ),
1084 'xmm1.dw0': ( 'uint', 'both', ),
1085 'xmm2.dw0': ( 'uint', 'both', ),
1086 'xmm3.dw0': ( 'uint', 'both', ),
1087 'xmm4.dw0': ( 'uint', 'both', ),
1088 'xmm5.dw0': ( 'uint', 'both', ),
1089 'xmm6.dw0': ( 'uint', 'both', ),
1090 'xmm7.dw0': ( 'uint', 'both', ),
1091 'xmm8.dw0': ( 'uint', 'both', ),
1092 'xmm9.dw0': ( 'uint', 'both', ),
1093 'xmm10.dw0': ( 'uint', 'both', ),
1094 'xmm11.dw0': ( 'uint', 'both', ),
1095 'xmm12.dw0': ( 'uint', 'both', ),
1096 'xmm13.dw0': ( 'uint', 'both', ),
1097 'xmm14.dw0': ( 'uint', 'both', ),
1098 'xmm15_dw0': ( 'uint', 'both', ),
1099 # AVX registers.
1100 'ymm0': ( 'uint', 'both', ),
1101 'ymm1': ( 'uint', 'both', ),
1102 'ymm2': ( 'uint', 'both', ),
1103 'ymm3': ( 'uint', 'both', ),
1104 'ymm4': ( 'uint', 'both', ),
1105 'ymm5': ( 'uint', 'both', ),
1106 'ymm6': ( 'uint', 'both', ),
1107 'ymm7': ( 'uint', 'both', ),
1108 'ymm8': ( 'uint', 'both', ),
1109 'ymm9': ( 'uint', 'both', ),
1110 'ymm10': ( 'uint', 'both', ),
1111 'ymm11': ( 'uint', 'both', ),
1112 'ymm12': ( 'uint', 'both', ),
1113 'ymm13': ( 'uint', 'both', ),
1114 'ymm14': ( 'uint', 'both', ),
1115 'ymm15': ( 'uint', 'both', ),
1116
1117 # Special ones.
1118 'value.xcpt': ( 'uint', 'output', ),
1119 };
1120
1121 def __init__(self, sField, sOp, sValue, sType):
1122 assert sField in self.kdFields;
1123 assert sOp in self.kasOperators;
1124 self.sField = sField;
1125 self.sOp = sOp;
1126 self.sValue = sValue;
1127 self.sType = sType;
1128 assert isinstance(sField, str);
1129 assert isinstance(sOp, str);
1130 assert isinstance(sType, str);
1131 assert isinstance(sValue, str);
1132
1133
1134class TestSelector(object):
1135 """
1136 One selector for an instruction test.
1137 """
1138 ## Selector compare operators.
1139 kasCompareOps = [ '==', '!=' ];
1140 ## Selector variables and their valid values.
1141 kdVariables = {
1142 # Operand size.
1143 'size': {
1144 'o16': 'size_o16',
1145 'o32': 'size_o32',
1146 'o64': 'size_o64',
1147 },
1148 # Execution ring.
1149 'ring': {
1150 '0': 'ring_0',
1151 '1': 'ring_1',
1152 '2': 'ring_2',
1153 '3': 'ring_3',
1154 '0..2': 'ring_0_thru_2',
1155 '1..3': 'ring_1_thru_3',
1156 },
1157 # Basic code mode.
1158 'codebits': {
1159 '64': 'code_64bit',
1160 '32': 'code_32bit',
1161 '16': 'code_16bit',
1162 },
1163 # cpu modes.
1164 'mode': {
1165 'real': 'mode_real',
1166 'prot': 'mode_prot',
1167 'long': 'mode_long',
1168 'v86': 'mode_v86',
1169 'smm': 'mode_smm',
1170 'vmx': 'mode_vmx',
1171 'svm': 'mode_svm',
1172 },
1173 # paging on/off
1174 'paging': {
1175 'on': 'paging_on',
1176 'off': 'paging_off',
1177 },
1178 # CPU vendor
1179 'vendor': {
1180 'amd': 'vendor_amd',
1181 'intel': 'vendor_intel',
1182 'via': 'vendor_via',
1183 },
1184 };
1185 ## Selector shorthand predicates.
1186 ## These translates into variable expressions.
1187 kdPredicates = {
1188 'o16': 'size==o16',
1189 'o32': 'size==o32',
1190 'o64': 'size==o64',
1191 'ring0': 'ring==0',
1192 '!ring0': 'ring==1..3',
1193 'ring1': 'ring==1',
1194 'ring2': 'ring==2',
1195 'ring3': 'ring==3',
1196 'user': 'ring==3',
1197 'supervisor': 'ring==0..2',
1198 'real': 'mode==real',
1199 'prot': 'mode==prot',
1200 'long': 'mode==long',
1201 'v86': 'mode==v86',
1202 'smm': 'mode==smm',
1203 'vmx': 'mode==vmx',
1204 'svm': 'mode==svm',
1205 'paging': 'paging==on',
1206 '!paging': 'paging==off',
1207 'amd': 'vendor==amd',
1208 '!amd': 'vendor!=amd',
1209 'intel': 'vendor==intel',
1210 '!intel': 'vendor!=intel',
1211 'via': 'vendor==via',
1212 '!via': 'vendor!=via',
1213 };
1214
1215 def __init__(self, sVariable, sOp, sValue):
1216 assert sVariable in self.kdVariables;
1217 assert sOp in self.kasCompareOps;
1218 assert sValue in self.kdVariables[sVariable];
1219 self.sVariable = sVariable;
1220 self.sOp = sOp;
1221 self.sValue = sValue;
1222
1223
1224class InstructionTest(object):
1225 """
1226 Instruction test.
1227 """
1228
1229 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1230 self.oInstr = oInstr; # type: InstructionTest
1231 self.aoInputs = []; # type: list(TestInOut)
1232 self.aoOutputs = []; # type: list(TestInOut)
1233 self.aoSelectors = []; # type: list(TestSelector)
1234
1235 def toString(self, fRepr = False):
1236 """
1237 Converts it to string representation.
1238 """
1239 asWords = [];
1240 if self.aoSelectors:
1241 for oSelector in self.aoSelectors:
1242 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1243 asWords.append('/');
1244
1245 for oModifier in self.aoInputs:
1246 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1247
1248 asWords.append('->');
1249
1250 for oModifier in self.aoOutputs:
1251 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1252
1253 if fRepr:
1254 return '<' + ' '.join(asWords) + '>';
1255 return ' '.join(asWords);
1256
1257 def __str__(self):
1258 """ Provide string represenation. """
1259 return self.toString(False);
1260
1261 def __repr__(self):
1262 """ Provide unambigious string representation. """
1263 return self.toString(True);
1264
1265class Operand(object):
1266 """
1267 Instruction operand.
1268 """
1269
1270 def __init__(self, sWhere, sType):
1271 assert sWhere in g_kdOpLocations, sWhere;
1272 assert sType in g_kdOpTypes, sType;
1273 self.sWhere = sWhere; ##< g_kdOpLocations
1274 self.sType = sType; ##< g_kdOpTypes
1275
1276 def usesModRM(self):
1277 """ Returns True if using some form of ModR/M encoding. """
1278 return self.sType[0] in ['E', 'G', 'M'];
1279
1280
1281
1282class Instruction(object): # pylint: disable=too-many-instance-attributes
1283 """
1284 Instruction.
1285 """
1286
1287 def __init__(self, sSrcFile, iLine):
1288 ## @name Core attributes.
1289 ## @{
1290 self.sMnemonic = None;
1291 self.sBrief = None;
1292 self.asDescSections = []; # type: list(str)
1293 self.aoMaps = []; # type: list(InstructionMap)
1294 self.aoOperands = []; # type: list(Operand)
1295 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1296 self.sOpcode = None; # type: str
1297 self.sSubOpcode = None; # type: str
1298 self.sEncoding = None;
1299 self.asFlTest = None;
1300 self.asFlModify = None;
1301 self.asFlUndefined = None;
1302 self.asFlSet = None;
1303 self.asFlClear = None;
1304 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1305 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1306 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1307 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1308 self.aoTests = []; # type: list(InstructionTest)
1309 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1310 self.oCpuExpr = None; ##< Some CPU restriction expression...
1311 self.sGroup = None;
1312 self.fUnused = False; ##< Unused instruction.
1313 self.fInvalid = False; ##< Invalid instruction (like UD2).
1314 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1315 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1316 ## @}
1317
1318 ## @name Implementation attributes.
1319 ## @{
1320 self.sStats = None;
1321 self.sFunction = None;
1322 self.fStub = False;
1323 self.fUdStub = False;
1324 ## @}
1325
1326 ## @name Decoding info
1327 ## @{
1328 self.sSrcFile = sSrcFile;
1329 self.iLineCreated = iLine;
1330 self.iLineCompleted = None;
1331 self.cOpTags = 0;
1332 self.iLineFnIemOpMacro = -1;
1333 self.iLineMnemonicMacro = -1;
1334 ## @}
1335
1336 ## @name Intermediate input fields.
1337 ## @{
1338 self.sRawDisOpNo = None;
1339 self.asRawDisParams = [];
1340 self.sRawIemOpFlags = None;
1341 self.sRawOldOpcodes = None;
1342 self.asCopyTests = [];
1343 ## @}
1344
1345 def toString(self, fRepr = False):
1346 """ Turn object into a string. """
1347 aasFields = [];
1348
1349 aasFields.append(['opcode', self.sOpcode]);
1350 aasFields.append(['mnemonic', self.sMnemonic]);
1351 for iOperand, oOperand in enumerate(self.aoOperands):
1352 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1353 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1354 aasFields.append(['encoding', self.sEncoding]);
1355 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1356 aasFields.append(['disenum', self.sDisEnum]);
1357 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1358 aasFields.append(['group', self.sGroup]);
1359 if self.fUnused: aasFields.append(['unused', 'True']);
1360 if self.fInvalid: aasFields.append(['invalid', 'True']);
1361 aasFields.append(['invlstyle', self.sInvalidStyle]);
1362 aasFields.append(['fltest', self.asFlTest]);
1363 aasFields.append(['flmodify', self.asFlModify]);
1364 aasFields.append(['flundef', self.asFlUndefined]);
1365 aasFields.append(['flset', self.asFlSet]);
1366 aasFields.append(['flclear', self.asFlClear]);
1367 aasFields.append(['mincpu', self.sMinCpu]);
1368 aasFields.append(['stats', self.sStats]);
1369 aasFields.append(['sFunction', self.sFunction]);
1370 if self.fStub: aasFields.append(['fStub', 'True']);
1371 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1372 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1373 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1374 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1375
1376 sRet = '<' if fRepr else '';
1377 for sField, sValue in aasFields:
1378 if sValue != None:
1379 if len(sRet) > 1:
1380 sRet += '; ';
1381 sRet += '%s=%s' % (sField, sValue,);
1382 if fRepr:
1383 sRet += '>';
1384
1385 return sRet;
1386
1387 def __str__(self):
1388 """ Provide string represenation. """
1389 return self.toString(False);
1390
1391 def __repr__(self):
1392 """ Provide unambigious string representation. """
1393 return self.toString(True);
1394
1395 def getOpcodeByte(self):
1396 """
1397 Decodes sOpcode into a byte range integer value.
1398 Raises exception if sOpcode is None or invalid.
1399 """
1400 if self.sOpcode is None:
1401 raise Exception('No opcode byte for %s!' % (self,));
1402 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1403
1404 # Full hex byte form.
1405 if sOpcode[:2] == '0x':
1406 return int(sOpcode, 16);
1407
1408 # The /r form:
1409 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1410 return int(sOpcode[-1:]) << 3;
1411
1412 # The 11/r form:
1413 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1414 return (int(sOpcode[-1:]) << 3) | 0xc0;
1415
1416 # The !11/r form (returns mod=1):
1417 ## @todo this doesn't really work...
1418 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1419 return (int(sOpcode[-1:]) << 3) | 0x80;
1420
1421 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1422
1423 @staticmethod
1424 def _flagsToIntegerMask(asFlags):
1425 """
1426 Returns the integer mask value for asFlags.
1427 """
1428 uRet = 0;
1429 if asFlags:
1430 for sFlag in asFlags:
1431 sConstant = g_kdEFlagsMnemonics[sFlag];
1432 assert sConstant[0] != '!', sConstant
1433 uRet |= g_kdX86EFlagsConstants[sConstant];
1434 return uRet;
1435
1436 def getTestedFlagsMask(self):
1437 """ Returns asFlTest into a integer mask value """
1438 return self._flagsToIntegerMask(self.asFlTest);
1439
1440 def getModifiedFlagsMask(self):
1441 """ Returns asFlModify into a integer mask value """
1442 return self._flagsToIntegerMask(self.asFlModify);
1443
1444 def getUndefinedFlagsMask(self):
1445 """ Returns asFlUndefined into a integer mask value """
1446 return self._flagsToIntegerMask(self.asFlUndefined);
1447
1448 def getSetFlagsMask(self):
1449 """ Returns asFlSet into a integer mask value """
1450 return self._flagsToIntegerMask(self.asFlSet);
1451
1452 def getClearedFlagsMask(self):
1453 """ Returns asFlClear into a integer mask value """
1454 return self._flagsToIntegerMask(self.asFlClear);
1455
1456 def onlyInVexMaps(self):
1457 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1458 if not self.aoMaps:
1459 return False;
1460 for oMap in self.aoMaps:
1461 if not oMap.isVexMap():
1462 return False;
1463 return True;
1464
1465
1466
1467## All the instructions.
1468g_aoAllInstructions = []; # type: list(Instruction)
1469
1470## All the instructions indexed by statistics name (opstat).
1471g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1472
1473## All the instructions indexed by function name (opfunction).
1474g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1475
1476## Instructions tagged by oponlytest
1477g_aoOnlyTestInstructions = []; # type: list(Instruction)
1478
1479## Instruction maps.
1480g_dInstructionMaps = {
1481 'one': InstructionMap('one'),
1482 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1483 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1484 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1485 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1486 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1487 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1488 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1489 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1490 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1491 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1492 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1493 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1494 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1495 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1496 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1497 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1498 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1499 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1500 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1501
1502 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1503 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1504 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1505 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1506 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1507 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1508 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1509 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1510 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1511 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1512 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1513 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1514 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1515 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1516
1517 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1518 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1519
1520 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1521 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1522 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1523 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1524 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1525 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1526
1527 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1528 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1529
1530 '3dnow': InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1531 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1532 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1533 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1534 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1535 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1536 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1537 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1538};
1539
1540
1541
1542class ParserException(Exception):
1543 """ Parser exception """
1544 def __init__(self, sMessage):
1545 Exception.__init__(self, sMessage);
1546
1547
1548class SimpleParser(object):
1549 """
1550 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1551 """
1552
1553 ## @name Parser state.
1554 ## @{
1555 kiCode = 0;
1556 kiCommentMulti = 1;
1557 ## @}
1558
1559 def __init__(self, sSrcFile, asLines, sDefaultMap):
1560 self.sSrcFile = sSrcFile;
1561 self.asLines = asLines;
1562 self.iLine = 0;
1563 self.iState = self.kiCode;
1564 self.sComment = '';
1565 self.iCommentLine = 0;
1566 self.aoCurInstrs = [];
1567
1568 assert sDefaultMap in g_dInstructionMaps;
1569 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1570
1571 self.cTotalInstr = 0;
1572 self.cTotalStubs = 0;
1573 self.cTotalTagged = 0;
1574
1575 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1576 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1577 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1578 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1579 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1580 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1581 self.fDebug = True;
1582
1583 self.dTagHandlers = {
1584 '@opbrief': self.parseTagOpBrief,
1585 '@opdesc': self.parseTagOpDesc,
1586 '@opmnemonic': self.parseTagOpMnemonic,
1587 '@op1': self.parseTagOpOperandN,
1588 '@op2': self.parseTagOpOperandN,
1589 '@op3': self.parseTagOpOperandN,
1590 '@op4': self.parseTagOpOperandN,
1591 '@oppfx': self.parseTagOpPfx,
1592 '@opmaps': self.parseTagOpMaps,
1593 '@opcode': self.parseTagOpcode,
1594 '@opcodesub': self.parseTagOpcodeSub,
1595 '@openc': self.parseTagOpEnc,
1596 '@opfltest': self.parseTagOpEFlags,
1597 '@opflmodify': self.parseTagOpEFlags,
1598 '@opflundef': self.parseTagOpEFlags,
1599 '@opflset': self.parseTagOpEFlags,
1600 '@opflclear': self.parseTagOpEFlags,
1601 '@ophints': self.parseTagOpHints,
1602 '@opdisenum': self.parseTagOpDisEnum,
1603 '@opmincpu': self.parseTagOpMinCpu,
1604 '@opcpuid': self.parseTagOpCpuId,
1605 '@opgroup': self.parseTagOpGroup,
1606 '@opunused': self.parseTagOpUnusedInvalid,
1607 '@opinvalid': self.parseTagOpUnusedInvalid,
1608 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1609 '@optest': self.parseTagOpTest,
1610 '@optestign': self.parseTagOpTestIgnore,
1611 '@optestignore': self.parseTagOpTestIgnore,
1612 '@opcopytests': self.parseTagOpCopyTests,
1613 '@oponly': self.parseTagOpOnlyTest,
1614 '@oponlytest': self.parseTagOpOnlyTest,
1615 '@opxcpttype': self.parseTagOpXcptType,
1616 '@opstats': self.parseTagOpStats,
1617 '@opfunction': self.parseTagOpFunction,
1618 '@opdone': self.parseTagOpDone,
1619 };
1620 for i in range(48):
1621 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1622 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1623
1624 self.asErrors = [];
1625
1626 def raiseError(self, sMessage):
1627 """
1628 Raise error prefixed with the source and line number.
1629 """
1630 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1631
1632 def raiseCommentError(self, iLineInComment, sMessage):
1633 """
1634 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1635 """
1636 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1637
1638 def error(self, sMessage):
1639 """
1640 Adds an error.
1641 returns False;
1642 """
1643 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1644 return False;
1645
1646 def errorComment(self, iLineInComment, sMessage):
1647 """
1648 Adds a comment error.
1649 returns False;
1650 """
1651 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1652 return False;
1653
1654 def printErrors(self):
1655 """
1656 Print the errors to stderr.
1657 Returns number of errors.
1658 """
1659 if self.asErrors:
1660 sys.stderr.write(u''.join(self.asErrors));
1661 return len(self.asErrors);
1662
1663 def debug(self, sMessage):
1664 """
1665 For debugging.
1666 """
1667 if self.fDebug:
1668 print('debug: %s' % (sMessage,));
1669
1670
1671 def addInstruction(self, iLine = None):
1672 """
1673 Adds an instruction.
1674 """
1675 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1676 g_aoAllInstructions.append(oInstr);
1677 self.aoCurInstrs.append(oInstr);
1678 return oInstr;
1679
1680 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1681 """
1682 Derives the mnemonic and operands from a IEM stats base name like string.
1683 """
1684 if oInstr.sMnemonic is None:
1685 asWords = sStats.split('_');
1686 oInstr.sMnemonic = asWords[0].lower();
1687 if len(asWords) > 1 and not oInstr.aoOperands:
1688 for sType in asWords[1:]:
1689 if sType in g_kdOpTypes:
1690 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1691 else:
1692 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1693 return False;
1694 return True;
1695
1696 def doneInstructionOne(self, oInstr, iLine):
1697 """
1698 Complete the parsing by processing, validating and expanding raw inputs.
1699 """
1700 assert oInstr.iLineCompleted is None;
1701 oInstr.iLineCompleted = iLine;
1702
1703 #
1704 # Specified instructions.
1705 #
1706 if oInstr.cOpTags > 0:
1707 if oInstr.sStats is None:
1708 pass;
1709
1710 #
1711 # Unspecified legacy stuff. We generally only got a few things to go on here.
1712 # /** Opcode 0x0f 0x00 /0. */
1713 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1714 #
1715 else:
1716 #if oInstr.sRawOldOpcodes:
1717 #
1718 #if oInstr.sMnemonic:
1719 pass;
1720
1721 #
1722 # Common defaults.
1723 #
1724
1725 # Guess mnemonic and operands from stats if the former is missing.
1726 if oInstr.sMnemonic is None:
1727 if oInstr.sStats is not None:
1728 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1729 elif oInstr.sFunction is not None:
1730 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1731
1732 # Derive the disassembler op enum constant from the mnemonic.
1733 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1734 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1735
1736 # Derive the IEM statistics base name from mnemonic and operand types.
1737 if oInstr.sStats is None:
1738 if oInstr.sFunction is not None:
1739 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1740 elif oInstr.sMnemonic is not None:
1741 oInstr.sStats = oInstr.sMnemonic;
1742 for oOperand in oInstr.aoOperands:
1743 if oOperand.sType:
1744 oInstr.sStats += '_' + oOperand.sType;
1745
1746 # Derive the IEM function name from mnemonic and operand types.
1747 if oInstr.sFunction is None:
1748 if oInstr.sMnemonic is not None:
1749 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1750 for oOperand in oInstr.aoOperands:
1751 if oOperand.sType:
1752 oInstr.sFunction += '_' + oOperand.sType;
1753 elif oInstr.sStats:
1754 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1755
1756 #
1757 # Apply default map and then add the instruction to all it's groups.
1758 #
1759 if not oInstr.aoMaps:
1760 oInstr.aoMaps = [ self.oDefaultMap, ];
1761 for oMap in oInstr.aoMaps:
1762 oMap.aoInstructions.append(oInstr);
1763
1764 #
1765 # Derive encoding from operands and maps.
1766 #
1767 if oInstr.sEncoding is None:
1768 if not oInstr.aoOperands:
1769 if oInstr.fUnused and oInstr.sSubOpcode:
1770 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
1771 else:
1772 oInstr.sEncoding = 'fixed';
1773 elif oInstr.aoOperands[0].usesModRM():
1774 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
1775 or oInstr.onlyInVexMaps():
1776 oInstr.sEncoding = 'VEX.ModR/M';
1777 else:
1778 oInstr.sEncoding = 'ModR/M';
1779
1780 #
1781 # Check the opstat value and add it to the opstat indexed dictionary.
1782 #
1783 if oInstr.sStats:
1784 if oInstr.sStats not in g_dAllInstructionsByStat:
1785 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1786 else:
1787 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1788 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1789
1790 #
1791 # Add to function indexed dictionary. We allow multiple instructions per function.
1792 #
1793 if oInstr.sFunction:
1794 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1795 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1796 else:
1797 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1798
1799 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1800 return True;
1801
1802 def doneInstructions(self, iLineInComment = None):
1803 """
1804 Done with current instruction.
1805 """
1806 for oInstr in self.aoCurInstrs:
1807 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1808 if oInstr.fStub:
1809 self.cTotalStubs += 1;
1810
1811 self.cTotalInstr += len(self.aoCurInstrs);
1812
1813 self.sComment = '';
1814 self.aoCurInstrs = [];
1815 return True;
1816
1817 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1818 """
1819 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1820 is False, only None values and empty strings are replaced.
1821 """
1822 for oInstr in self.aoCurInstrs:
1823 if fOverwrite is not True:
1824 oOldValue = getattr(oInstr, sAttrib);
1825 if oOldValue is not None:
1826 continue;
1827 setattr(oInstr, sAttrib, oValue);
1828
1829 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1830 """
1831 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1832 If fOverwrite is False, only None values and empty strings are replaced.
1833 """
1834 for oInstr in self.aoCurInstrs:
1835 aoArray = getattr(oInstr, sAttrib);
1836 while len(aoArray) <= iEntry:
1837 aoArray.append(None);
1838 if fOverwrite is True or aoArray[iEntry] is None:
1839 aoArray[iEntry] = oValue;
1840
1841 def parseCommentOldOpcode(self, asLines):
1842 """ Deals with 'Opcode 0xff /4' like comments """
1843 asWords = asLines[0].split();
1844 if len(asWords) >= 2 \
1845 and asWords[0] == 'Opcode' \
1846 and ( asWords[1].startswith('0x')
1847 or asWords[1].startswith('0X')):
1848 asWords = asWords[:1];
1849 for iWord, sWord in enumerate(asWords):
1850 if sWord.startswith('0X'):
1851 sWord = '0x' + sWord[:2];
1852 asWords[iWord] = asWords;
1853 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1854
1855 return False;
1856
1857 def ensureInstructionForOpTag(self, iTagLine):
1858 """ Ensure there is an instruction for the op-tag being parsed. """
1859 if not self.aoCurInstrs:
1860 self.addInstruction(self.iCommentLine + iTagLine);
1861 for oInstr in self.aoCurInstrs:
1862 oInstr.cOpTags += 1;
1863 if oInstr.cOpTags == 1:
1864 self.cTotalTagged += 1;
1865 return self.aoCurInstrs[-1];
1866
1867 @staticmethod
1868 def flattenSections(aasSections):
1869 """
1870 Flattens multiline sections into stripped single strings.
1871 Returns list of strings, on section per string.
1872 """
1873 asRet = [];
1874 for asLines in aasSections:
1875 if asLines:
1876 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1877 return asRet;
1878
1879 @staticmethod
1880 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1881 """
1882 Flattens sections into a simple stripped string with newlines as
1883 section breaks. The final section does not sport a trailing newline.
1884 """
1885 # Typical: One section with a single line.
1886 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1887 return aasSections[0][0].strip();
1888
1889 sRet = '';
1890 for iSection, asLines in enumerate(aasSections):
1891 if asLines:
1892 if iSection > 0:
1893 sRet += sSectionSep;
1894 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1895 return sRet;
1896
1897
1898
1899 ## @name Tag parsers
1900 ## @{
1901
1902 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1903 """
1904 Tag: \@opbrief
1905 Value: Text description, multiple sections, appended.
1906
1907 Brief description. If not given, it's the first sentence from @opdesc.
1908 """
1909 oInstr = self.ensureInstructionForOpTag(iTagLine);
1910
1911 # Flatten and validate the value.
1912 sBrief = self.flattenAllSections(aasSections);
1913 if not sBrief:
1914 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1915 if sBrief[-1] != '.':
1916 sBrief = sBrief + '.';
1917 if len(sBrief) > 180:
1918 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1919 offDot = sBrief.find('.');
1920 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1921 offDot = sBrief.find('.', offDot + 1);
1922 if offDot >= 0 and offDot != len(sBrief) - 1:
1923 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1924
1925 # Update the instruction.
1926 if oInstr.sBrief is not None:
1927 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1928 % (sTag, oInstr.sBrief, sBrief,));
1929 _ = iEndLine;
1930 return True;
1931
1932 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1933 """
1934 Tag: \@opdesc
1935 Value: Text description, multiple sections, appended.
1936
1937 It is used to describe instructions.
1938 """
1939 oInstr = self.ensureInstructionForOpTag(iTagLine);
1940 if aasSections:
1941 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1942 return True;
1943
1944 _ = sTag; _ = iEndLine;
1945 return True;
1946
1947 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1948 """
1949 Tag: @opmenmonic
1950 Value: mnemonic
1951
1952 The 'mnemonic' value must be a valid C identifier string. Because of
1953 prefixes, groups and whatnot, there times when the mnemonic isn't that
1954 of an actual assembler mnemonic.
1955 """
1956 oInstr = self.ensureInstructionForOpTag(iTagLine);
1957
1958 # Flatten and validate the value.
1959 sMnemonic = self.flattenAllSections(aasSections);
1960 if not self.oReMnemonic.match(sMnemonic):
1961 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1962 if oInstr.sMnemonic is not None:
1963 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1964 % (sTag, oInstr.sMnemonic, sMnemonic,));
1965 oInstr.sMnemonic = sMnemonic
1966
1967 _ = iEndLine;
1968 return True;
1969
1970 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1971 """
1972 Tags: \@op1, \@op2, \@op3, \@op4
1973 Value: [where:]type
1974
1975 The 'where' value indicates where the operand is found, like the 'reg'
1976 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1977 a list.
1978
1979 The 'type' value indicates the operand type. These follow the types
1980 given in the opcode tables in the CPU reference manuals.
1981 See Instruction.kdOperandTypes for a list.
1982
1983 """
1984 oInstr = self.ensureInstructionForOpTag(iTagLine);
1985 idxOp = int(sTag[-1]) - 1;
1986 assert idxOp >= 0 and idxOp < 4;
1987
1988 # flatten, split up, and validate the "where:type" value.
1989 sFlattened = self.flattenAllSections(aasSections);
1990 asSplit = sFlattened.split(':');
1991 if len(asSplit) == 1:
1992 sType = asSplit[0];
1993 sWhere = None;
1994 elif len(asSplit) == 2:
1995 (sWhere, sType) = asSplit;
1996 else:
1997 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1998
1999 if sType not in g_kdOpTypes:
2000 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2001 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
2002 if sWhere is None:
2003 sWhere = g_kdOpTypes[sType][1];
2004 elif sWhere not in g_kdOpLocations:
2005 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2006 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
2007
2008 # Insert the operand, refusing to overwrite an existing one.
2009 while idxOp >= len(oInstr.aoOperands):
2010 oInstr.aoOperands.append(None);
2011 if oInstr.aoOperands[idxOp] is not None:
2012 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
2013 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
2014 sWhere, sType,));
2015 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
2016
2017 _ = iEndLine;
2018 return True;
2019
2020 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
2021 """
2022 Tag: \@opmaps
2023 Value: map[,map2]
2024
2025 Indicates which maps the instruction is in. There is a default map
2026 associated with each input file.
2027 """
2028 oInstr = self.ensureInstructionForOpTag(iTagLine);
2029
2030 # Flatten, split up and validate the value.
2031 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
2032 asMaps = sFlattened.split(',');
2033 if not asMaps:
2034 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2035 for sMap in asMaps:
2036 if sMap not in g_dInstructionMaps:
2037 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
2038 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2039
2040 # Add the maps to the current list. Throw errors on duplicates.
2041 for oMap in oInstr.aoMaps:
2042 if oMap.sName in asMaps:
2043 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2044
2045 for sMap in asMaps:
2046 oMap = g_dInstructionMaps[sMap];
2047 if oMap not in oInstr.aoMaps:
2048 oInstr.aoMaps.append(oMap);
2049 else:
2050 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2051
2052 _ = iEndLine;
2053 return True;
2054
2055 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2056 """
2057 Tag: \@oppfx
2058 Value: n/a|none|0x66|0xf3|0xf2
2059
2060 Required prefix for the instruction. (In a (E)VEX context this is the
2061 value of the 'pp' field rather than an actual prefix.)
2062 """
2063 oInstr = self.ensureInstructionForOpTag(iTagLine);
2064
2065 # Flatten and validate the value.
2066 sFlattened = self.flattenAllSections(aasSections);
2067 asPrefixes = sFlattened.split();
2068 if len(asPrefixes) > 1:
2069 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2070
2071 sPrefix = asPrefixes[0].lower();
2072 if sPrefix == 'none':
2073 sPrefix = 'none';
2074 elif sPrefix == 'n/a':
2075 sPrefix = None;
2076 else:
2077 if len(sPrefix) == 2:
2078 sPrefix = '0x' + sPrefix;
2079 if not _isValidOpcodeByte(sPrefix):
2080 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2081
2082 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2083 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2084
2085 # Set it.
2086 if oInstr.sPrefix is not None:
2087 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2088 oInstr.sPrefix = sPrefix;
2089
2090 _ = iEndLine;
2091 return True;
2092
2093 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2094 """
2095 Tag: \@opcode
2096 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2097
2098 The opcode byte or sub-byte for the instruction in the context of a map.
2099 """
2100 oInstr = self.ensureInstructionForOpTag(iTagLine);
2101
2102 # Flatten and validate the value.
2103 sOpcode = self.flattenAllSections(aasSections);
2104 if _isValidOpcodeByte(sOpcode):
2105 pass;
2106 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2107 pass;
2108 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2109 pass;
2110 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2111 pass;
2112 else:
2113 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2114
2115 # Set it.
2116 if oInstr.sOpcode is not None:
2117 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2118 oInstr.sOpcode = sOpcode;
2119
2120 _ = iEndLine;
2121 return True;
2122
2123 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2124 """
2125 Tag: \@opcodesub
2126 Value: none | 11 mr/reg | !11 mr/reg
2127
2128 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2129 represents exactly two different instructions. The more proper way would
2130 be to go via maps with two members, but this is faster.
2131 """
2132 oInstr = self.ensureInstructionForOpTag(iTagLine);
2133
2134 # Flatten and validate the value.
2135 sSubOpcode = self.flattenAllSections(aasSections);
2136 if sSubOpcode not in g_kdSubOpcodes:
2137 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2138 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2139
2140 # Set it.
2141 if oInstr.sSubOpcode is not None:
2142 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2143 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2144 oInstr.sSubOpcode = sSubOpcode;
2145
2146 _ = iEndLine;
2147 return True;
2148
2149 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2150 """
2151 Tag: \@openc
2152 Value: ModR/M|fixed|prefix|<map name>
2153
2154 The instruction operand encoding style.
2155 """
2156 oInstr = self.ensureInstructionForOpTag(iTagLine);
2157
2158 # Flatten and validate the value.
2159 sEncoding = self.flattenAllSections(aasSections);
2160 if sEncoding in g_kdEncodings:
2161 pass;
2162 elif sEncoding in g_dInstructionMaps:
2163 pass;
2164 elif not _isValidOpcodeByte(sEncoding):
2165 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2166
2167 # Set it.
2168 if oInstr.sEncoding is not None:
2169 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2170 % ( sTag, oInstr.sEncoding, sEncoding,));
2171 oInstr.sEncoding = sEncoding;
2172
2173 _ = iEndLine;
2174 return True;
2175
2176 ## EFlags tag to Instruction attribute name.
2177 kdOpFlagToAttr = {
2178 '@opfltest': 'asFlTest',
2179 '@opflmodify': 'asFlModify',
2180 '@opflundef': 'asFlUndefined',
2181 '@opflset': 'asFlSet',
2182 '@opflclear': 'asFlClear',
2183 };
2184
2185 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2186 """
2187 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2188 Value: <eflags specifier>
2189
2190 """
2191 oInstr = self.ensureInstructionForOpTag(iTagLine);
2192
2193 # Flatten, split up and validate the values.
2194 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2195 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2196 asFlags = [];
2197 else:
2198 fRc = True;
2199 for iFlag, sFlag in enumerate(asFlags):
2200 if sFlag not in g_kdEFlagsMnemonics:
2201 if sFlag.strip() in g_kdEFlagsMnemonics:
2202 asFlags[iFlag] = sFlag.strip();
2203 else:
2204 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2205 if not fRc:
2206 return False;
2207
2208 # Set them.
2209 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2210 if asOld is not None:
2211 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2212 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2213
2214 _ = iEndLine;
2215 return True;
2216
2217 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2218 """
2219 Tag: \@ophints
2220 Value: Comma or space separated list of flags and hints.
2221
2222 This covers the disassembler flags table and more.
2223 """
2224 oInstr = self.ensureInstructionForOpTag(iTagLine);
2225
2226 # Flatten as a space separated list, split it up and validate the values.
2227 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2228 if len(asHints) == 1 and asHints[0].lower() == 'none':
2229 asHints = [];
2230 else:
2231 fRc = True;
2232 for iHint, sHint in enumerate(asHints):
2233 if sHint not in g_kdHints:
2234 if sHint.strip() in g_kdHints:
2235 sHint[iHint] = sHint.strip();
2236 else:
2237 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2238 if not fRc:
2239 return False;
2240
2241 # Append them.
2242 for sHint in asHints:
2243 if sHint not in oInstr.dHints:
2244 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2245 else:
2246 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2247
2248 _ = iEndLine;
2249 return True;
2250
2251 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2252 """
2253 Tag: \@opdisenum
2254 Value: OP_XXXX
2255
2256 This is for select a specific (legacy) disassembler enum value for the
2257 instruction.
2258 """
2259 oInstr = self.ensureInstructionForOpTag(iTagLine);
2260
2261 # Flatten and split.
2262 asWords = self.flattenAllSections(aasSections).split();
2263 if len(asWords) != 1:
2264 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2265 if not asWords:
2266 return False;
2267 sDisEnum = asWords[0];
2268 if not self.oReDisEnum.match(sDisEnum):
2269 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2270 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2271
2272 # Set it.
2273 if oInstr.sDisEnum is not None:
2274 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2275 oInstr.sDisEnum = sDisEnum;
2276
2277 _ = iEndLine;
2278 return True;
2279
2280 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2281 """
2282 Tag: \@opmincpu
2283 Value: <simple CPU name>
2284
2285 Indicates when this instruction was introduced.
2286 """
2287 oInstr = self.ensureInstructionForOpTag(iTagLine);
2288
2289 # Flatten the value, split into words, make sure there's just one, valid it.
2290 asCpus = self.flattenAllSections(aasSections).split();
2291 if len(asCpus) > 1:
2292 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2293
2294 sMinCpu = asCpus[0];
2295 if sMinCpu in g_kdCpuNames:
2296 oInstr.sMinCpu = sMinCpu;
2297 else:
2298 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2299 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2300
2301 # Set it.
2302 if oInstr.sMinCpu is None:
2303 oInstr.sMinCpu = sMinCpu;
2304 elif oInstr.sMinCpu != sMinCpu:
2305 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2306
2307 _ = iEndLine;
2308 return True;
2309
2310 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2311 """
2312 Tag: \@opcpuid
2313 Value: none | <CPUID flag specifier>
2314
2315 CPUID feature bit which is required for the instruction to be present.
2316 """
2317 oInstr = self.ensureInstructionForOpTag(iTagLine);
2318
2319 # Flatten as a space separated list, split it up and validate the values.
2320 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2321 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2322 asCpuIds = [];
2323 else:
2324 fRc = True;
2325 for iCpuId, sCpuId in enumerate(asCpuIds):
2326 if sCpuId not in g_kdCpuIdFlags:
2327 if sCpuId.strip() in g_kdCpuIdFlags:
2328 sCpuId[iCpuId] = sCpuId.strip();
2329 else:
2330 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2331 if not fRc:
2332 return False;
2333
2334 # Append them.
2335 for sCpuId in asCpuIds:
2336 if sCpuId not in oInstr.asCpuIds:
2337 oInstr.asCpuIds.append(sCpuId);
2338 else:
2339 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2340
2341 _ = iEndLine;
2342 return True;
2343
2344 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2345 """
2346 Tag: \@opgroup
2347 Value: op_grp1[_subgrp2[_subsubgrp3]]
2348
2349 Instruction grouping.
2350 """
2351 oInstr = self.ensureInstructionForOpTag(iTagLine);
2352
2353 # Flatten as a space separated list, split it up and validate the values.
2354 asGroups = self.flattenAllSections(aasSections).split();
2355 if len(asGroups) != 1:
2356 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2357 sGroup = asGroups[0];
2358 if not self.oReGroupName.match(sGroup):
2359 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2360 % (sTag, sGroup, self.oReGroupName.pattern));
2361
2362 # Set it.
2363 if oInstr.sGroup is not None:
2364 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2365 oInstr.sGroup = sGroup;
2366
2367 _ = iEndLine;
2368 return True;
2369
2370 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2371 """
2372 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2373 Value: <invalid opcode behaviour style>
2374
2375 The \@opunused indicates the specification is for a currently unused
2376 instruction encoding.
2377
2378 The \@opinvalid indicates the specification is for an invalid currently
2379 instruction encoding (like UD2).
2380
2381 The \@opinvlstyle just indicates how CPUs decode the instruction when
2382 not supported (\@opcpuid, \@opmincpu) or disabled.
2383 """
2384 oInstr = self.ensureInstructionForOpTag(iTagLine);
2385
2386 # Flatten as a space separated list, split it up and validate the values.
2387 asStyles = self.flattenAllSections(aasSections).split();
2388 if len(asStyles) != 1:
2389 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2390 sStyle = asStyles[0];
2391 if sStyle not in g_kdInvalidStyles:
2392 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2393 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2394 # Set it.
2395 if oInstr.sInvalidStyle is not None:
2396 return self.errorComment(iTagLine,
2397 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2398 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2399 oInstr.sInvalidStyle = sStyle;
2400 if sTag == '@opunused':
2401 oInstr.fUnused = True;
2402 elif sTag == '@opinvalid':
2403 oInstr.fInvalid = True;
2404
2405 _ = iEndLine;
2406 return True;
2407
2408 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2409 """
2410 Tag: \@optest
2411 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2412 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2413
2414 The main idea here is to generate basic instruction tests.
2415
2416 The probably simplest way of handling the diverse input, would be to use
2417 it to produce size optimized byte code for a simple interpreter that
2418 modifies the register input and output states.
2419
2420 An alternative to the interpreter would be creating multiple tables,
2421 but that becomes rather complicated wrt what goes where and then to use
2422 them in an efficient manner.
2423 """
2424 oInstr = self.ensureInstructionForOpTag(iTagLine);
2425
2426 #
2427 # Do it section by section.
2428 #
2429 for asSectionLines in aasSections:
2430 #
2431 # Sort the input into outputs, inputs and selector conditions.
2432 #
2433 sFlatSection = self.flattenAllSections([asSectionLines,]);
2434 if not sFlatSection:
2435 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2436 continue;
2437 oTest = InstructionTest(oInstr);
2438
2439 asSelectors = [];
2440 asInputs = [];
2441 asOutputs = [];
2442 asCur = asOutputs;
2443 fRc = True;
2444 asWords = sFlatSection.split();
2445 for iWord in range(len(asWords) - 1, -1, -1):
2446 sWord = asWords[iWord];
2447 # Check for array switchers.
2448 if sWord == '->':
2449 if asCur != asOutputs:
2450 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2451 break;
2452 asCur = asInputs;
2453 elif sWord == '/':
2454 if asCur != asInputs:
2455 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2456 break;
2457 asCur = asSelectors;
2458 else:
2459 asCur.insert(0, sWord);
2460
2461 #
2462 # Validate and add selectors.
2463 #
2464 for sCond in asSelectors:
2465 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2466 oSelector = None;
2467 for sOp in TestSelector.kasCompareOps:
2468 off = sCondExp.find(sOp);
2469 if off >= 0:
2470 sVariable = sCondExp[:off];
2471 sValue = sCondExp[off + len(sOp):];
2472 if sVariable in TestSelector.kdVariables:
2473 if sValue in TestSelector.kdVariables[sVariable]:
2474 oSelector = TestSelector(sVariable, sOp, sValue);
2475 else:
2476 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2477 % ( sTag, sValue, sCond,
2478 TestSelector.kdVariables[sVariable].keys(),));
2479 else:
2480 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2481 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2482 break;
2483 if oSelector is not None:
2484 for oExisting in oTest.aoSelectors:
2485 if oExisting.sVariable == oSelector.sVariable:
2486 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2487 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2488 oTest.aoSelectors.append(oSelector);
2489 else:
2490 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2491
2492 #
2493 # Validate outputs and inputs, adding them to the test as we go along.
2494 #
2495 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2496 asValidFieldKinds = [ 'both', sDesc, ];
2497 for sItem in asItems:
2498 oItem = None;
2499 for sOp in TestInOut.kasOperators:
2500 off = sItem.find(sOp);
2501 if off < 0:
2502 continue;
2503 sField = sItem[:off];
2504 sValueType = sItem[off + len(sOp):];
2505 if sField in TestInOut.kdFields \
2506 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2507 asSplit = sValueType.split(':', 1);
2508 sValue = asSplit[0];
2509 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2510 if sType in TestInOut.kdTypes:
2511 oValid = TestInOut.kdTypes[sType].validate(sValue);
2512 if oValid is True:
2513 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2514 oItem = TestInOut(sField, sOp, sValue, sType);
2515 else:
2516 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2517 % ( sTag, sDesc, sItem, ));
2518 else:
2519 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2520 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2521 else:
2522 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2523 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2524 else:
2525 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2526 % ( sTag, sDesc, sField, sItem,
2527 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2528 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2529 break;
2530 if oItem is not None:
2531 for oExisting in aoDst:
2532 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2533 self.errorComment(iTagLine,
2534 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2535 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2536 aoDst.append(oItem);
2537 else:
2538 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2539
2540 #
2541 # .
2542 #
2543 if fRc:
2544 oInstr.aoTests.append(oTest);
2545 else:
2546 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2547 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2548 % (sTag, asSelectors, asInputs, asOutputs,));
2549
2550 _ = iEndLine;
2551 return True;
2552
2553 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2554 """
2555 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2556 """
2557 oInstr = self.ensureInstructionForOpTag(iTagLine);
2558
2559 iTest = 0;
2560 if sTag[-1] == ']':
2561 iTest = int(sTag[8:-1]);
2562 else:
2563 iTest = int(sTag[7:]);
2564
2565 if iTest != len(oInstr.aoTests):
2566 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2567 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2568
2569 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2570 """
2571 Tag: \@optestign | \@optestignore
2572 Value: <value is ignored>
2573
2574 This is a simple trick to ignore a test while debugging another.
2575
2576 See also \@oponlytest.
2577 """
2578 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2579 return True;
2580
2581 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2582 """
2583 Tag: \@opcopytests
2584 Value: <opstat | function> [..]
2585 Example: \@opcopytests add_Eb_Gb
2586
2587 Trick to avoid duplicating tests for different encodings of the same
2588 operation.
2589 """
2590 oInstr = self.ensureInstructionForOpTag(iTagLine);
2591
2592 # Flatten, validate and append the copy job to the instruction. We execute
2593 # them after parsing all the input so we can handle forward references.
2594 asToCopy = self.flattenAllSections(aasSections).split();
2595 if not asToCopy:
2596 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2597 for sToCopy in asToCopy:
2598 if sToCopy not in oInstr.asCopyTests:
2599 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2600 oInstr.asCopyTests.append(sToCopy);
2601 else:
2602 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2603 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2604 else:
2605 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2606
2607 _ = iEndLine;
2608 return True;
2609
2610 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2611 """
2612 Tag: \@oponlytest | \@oponly
2613 Value: none
2614
2615 Only test instructions with this tag. This is a trick that is handy
2616 for singling out one or two new instructions or tests.
2617
2618 See also \@optestignore.
2619 """
2620 oInstr = self.ensureInstructionForOpTag(iTagLine);
2621
2622 # Validate and add instruction to only test dictionary.
2623 sValue = self.flattenAllSections(aasSections).strip();
2624 if sValue:
2625 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2626
2627 if oInstr not in g_aoOnlyTestInstructions:
2628 g_aoOnlyTestInstructions.append(oInstr);
2629
2630 _ = iEndLine;
2631 return True;
2632
2633 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2634 """
2635 Tag: \@opxcpttype
2636 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2637
2638 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2639 """
2640 oInstr = self.ensureInstructionForOpTag(iTagLine);
2641
2642 # Flatten as a space separated list, split it up and validate the values.
2643 asTypes = self.flattenAllSections(aasSections).split();
2644 if len(asTypes) != 1:
2645 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2646 sType = asTypes[0];
2647 if sType not in g_kdXcptTypes:
2648 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2649 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2650 # Set it.
2651 if oInstr.sXcptType is not None:
2652 return self.errorComment(iTagLine,
2653 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2654 % ( sTag, oInstr.sXcptType, sType,));
2655 oInstr.sXcptType = sType;
2656
2657 _ = iEndLine;
2658 return True;
2659
2660 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2661 """
2662 Tag: \@opfunction
2663 Value: <VMM function name>
2664
2665 This is for explicitly setting the IEM function name. Normally we pick
2666 this up from the FNIEMOP_XXX macro invocation after the description, or
2667 generate it from the mnemonic and operands.
2668
2669 It it thought it maybe necessary to set it when specifying instructions
2670 which implementation isn't following immediately or aren't implemented yet.
2671 """
2672 oInstr = self.ensureInstructionForOpTag(iTagLine);
2673
2674 # Flatten and validate the value.
2675 sFunction = self.flattenAllSections(aasSections);
2676 if not self.oReFunctionName.match(sFunction):
2677 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2678 % (sTag, sFunction, self.oReFunctionName.pattern));
2679
2680 if oInstr.sFunction is not None:
2681 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2682 % (sTag, oInstr.sFunction, sFunction,));
2683 oInstr.sFunction = sFunction;
2684
2685 _ = iEndLine;
2686 return True;
2687
2688 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2689 """
2690 Tag: \@opstats
2691 Value: <VMM statistics base name>
2692
2693 This is for explicitly setting the statistics name. Normally we pick
2694 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2695 the mnemonic and operands.
2696
2697 It it thought it maybe necessary to set it when specifying instructions
2698 which implementation isn't following immediately or aren't implemented yet.
2699 """
2700 oInstr = self.ensureInstructionForOpTag(iTagLine);
2701
2702 # Flatten and validate the value.
2703 sStats = self.flattenAllSections(aasSections);
2704 if not self.oReStatsName.match(sStats):
2705 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2706 % (sTag, sStats, self.oReStatsName.pattern));
2707
2708 if oInstr.sStats is not None:
2709 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2710 % (sTag, oInstr.sStats, sStats,));
2711 oInstr.sStats = sStats;
2712
2713 _ = iEndLine;
2714 return True;
2715
2716 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2717 """
2718 Tag: \@opdone
2719 Value: none
2720
2721 Used to explictily flush the instructions that have been specified.
2722 """
2723 sFlattened = self.flattenAllSections(aasSections);
2724 if sFlattened != '':
2725 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2726 _ = sTag; _ = iEndLine;
2727 return self.doneInstructions();
2728
2729 ## @}
2730
2731
2732 def parseComment(self):
2733 """
2734 Parse the current comment (self.sComment).
2735
2736 If it's a opcode specifiying comment, we reset the macro stuff.
2737 """
2738 #
2739 # Reject if comment doesn't seem to contain anything interesting.
2740 #
2741 if self.sComment.find('Opcode') < 0 \
2742 and self.sComment.find('@') < 0:
2743 return False;
2744
2745 #
2746 # Split the comment into lines, removing leading asterisks and spaces.
2747 # Also remove leading and trailing empty lines.
2748 #
2749 asLines = self.sComment.split('\n');
2750 for iLine, sLine in enumerate(asLines):
2751 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2752
2753 while asLines and not asLines[0]:
2754 self.iCommentLine += 1;
2755 asLines.pop(0);
2756
2757 while asLines and not asLines[-1]:
2758 asLines.pop(len(asLines) - 1);
2759
2760 #
2761 # Check for old style: Opcode 0x0f 0x12
2762 #
2763 if asLines[0].startswith('Opcode '):
2764 self.parseCommentOldOpcode(asLines);
2765
2766 #
2767 # Look for @op* tagged data.
2768 #
2769 cOpTags = 0;
2770 sFlatDefault = None;
2771 sCurTag = '@default';
2772 iCurTagLine = 0;
2773 asCurSection = [];
2774 aasSections = [ asCurSection, ];
2775 for iLine, sLine in enumerate(asLines):
2776 if not sLine.startswith('@'):
2777 if sLine:
2778 asCurSection.append(sLine);
2779 elif asCurSection:
2780 asCurSection = [];
2781 aasSections.append(asCurSection);
2782 else:
2783 #
2784 # Process the previous tag.
2785 #
2786 if not asCurSection and len(aasSections) > 1:
2787 aasSections.pop(-1);
2788 if sCurTag in self.dTagHandlers:
2789 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2790 cOpTags += 1;
2791 elif sCurTag.startswith('@op'):
2792 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2793 elif sCurTag == '@default':
2794 sFlatDefault = self.flattenAllSections(aasSections);
2795 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2796 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2797 elif sCurTag in ['@encoding', '@opencoding']:
2798 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2799
2800 #
2801 # New tag.
2802 #
2803 asSplit = sLine.split(None, 1);
2804 sCurTag = asSplit[0].lower();
2805 if len(asSplit) > 1:
2806 asCurSection = [asSplit[1],];
2807 else:
2808 asCurSection = [];
2809 aasSections = [asCurSection, ];
2810 iCurTagLine = iLine;
2811
2812 #
2813 # Process the final tag.
2814 #
2815 if not asCurSection and len(aasSections) > 1:
2816 aasSections.pop(-1);
2817 if sCurTag in self.dTagHandlers:
2818 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2819 cOpTags += 1;
2820 elif sCurTag.startswith('@op'):
2821 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2822 elif sCurTag == '@default':
2823 sFlatDefault = self.flattenAllSections(aasSections);
2824
2825 #
2826 # Don't allow default text in blocks containing @op*.
2827 #
2828 if cOpTags > 0 and sFlatDefault:
2829 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2830
2831 return True;
2832
2833 def parseMacroInvocation(self, sInvocation):
2834 """
2835 Parses a macro invocation.
2836
2837 Returns a tuple, first element is the offset following the macro
2838 invocation. The second element is a list of macro arguments, where the
2839 zero'th is the macro name.
2840 """
2841 # First the name.
2842 offOpen = sInvocation.find('(');
2843 if offOpen <= 0:
2844 self.raiseError("macro invocation open parenthesis not found");
2845 sName = sInvocation[:offOpen].strip();
2846 if not self.oReMacroName.match(sName):
2847 return self.error("invalid macro name '%s'" % (sName,));
2848 asRet = [sName, ];
2849
2850 # Arguments.
2851 iLine = self.iLine;
2852 cDepth = 1;
2853 off = offOpen + 1;
2854 offStart = off;
2855 while cDepth > 0:
2856 if off >= len(sInvocation):
2857 if iLine >= len(self.asLines):
2858 return self.error('macro invocation beyond end of file');
2859 sInvocation += self.asLines[iLine];
2860 iLine += 1;
2861 ch = sInvocation[off];
2862
2863 if ch == ',' or ch == ')':
2864 if cDepth == 1:
2865 asRet.append(sInvocation[offStart:off].strip());
2866 offStart = off + 1;
2867 if ch == ')':
2868 cDepth -= 1;
2869 elif ch == '(':
2870 cDepth += 1;
2871 off += 1;
2872
2873 return (off, asRet);
2874
2875 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2876 """
2877 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2878 """
2879 offHit = sCode.find(sMacro);
2880 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2881 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2882 return (offHit + offAfter, asRet);
2883 return (len(sCode), None);
2884
2885 def findAndParseMacroInvocation(self, sCode, sMacro):
2886 """
2887 Returns None if not found, arguments as per parseMacroInvocation if found.
2888 """
2889 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2890
2891 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2892 """
2893 Returns same as findAndParseMacroInvocation.
2894 """
2895 for sMacro in asMacro:
2896 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2897 if asRet is not None:
2898 return asRet;
2899 return None;
2900
2901 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2902 sDisHints, sIemHints, asOperands):
2903 """
2904 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2905 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2906 """
2907 #
2908 # Some invocation checks.
2909 #
2910 if sUpper != sUpper.upper():
2911 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2912 if sLower != sLower.lower():
2913 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2914 if sUpper.lower() != sLower:
2915 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2916 if not self.oReMnemonic.match(sLower):
2917 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2918
2919 #
2920 # Check if sIemHints tells us to not consider this macro invocation.
2921 #
2922 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2923 return True;
2924
2925 # Apply to the last instruction only for now.
2926 if not self.aoCurInstrs:
2927 self.addInstruction();
2928 oInstr = self.aoCurInstrs[-1];
2929 if oInstr.iLineMnemonicMacro == -1:
2930 oInstr.iLineMnemonicMacro = self.iLine;
2931 else:
2932 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2933 % (sMacro, oInstr.iLineMnemonicMacro,));
2934
2935 # Mnemonic
2936 if oInstr.sMnemonic is None:
2937 oInstr.sMnemonic = sLower;
2938 elif oInstr.sMnemonic != sLower:
2939 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2940
2941 # Process operands.
2942 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2943 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2944 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2945 for iOperand, sType in enumerate(asOperands):
2946 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2947 if sWhere is None:
2948 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2949 if iOperand < len(oInstr.aoOperands): # error recovery.
2950 sWhere = oInstr.aoOperands[iOperand].sWhere;
2951 sType = oInstr.aoOperands[iOperand].sType;
2952 else:
2953 sWhere = 'reg';
2954 sType = 'Gb';
2955 if iOperand == len(oInstr.aoOperands):
2956 oInstr.aoOperands.append(Operand(sWhere, sType))
2957 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2958 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2959 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2960 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2961
2962 # Encoding.
2963 if sForm not in g_kdIemForms:
2964 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2965 else:
2966 if oInstr.sEncoding is None:
2967 oInstr.sEncoding = g_kdIemForms[sForm][0];
2968 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2969 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2970 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2971
2972 # Check the parameter locations for the encoding.
2973 if g_kdIemForms[sForm][1] is not None:
2974 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
2975 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
2976 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
2977 else:
2978 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2979 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2980 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2981 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2982
2983 # Stats.
2984 if not self.oReStatsName.match(sStats):
2985 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2986 elif oInstr.sStats is None:
2987 oInstr.sStats = sStats;
2988 elif oInstr.sStats != sStats:
2989 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2990 % (sMacro, oInstr.sStats, sStats,));
2991
2992 # Process the hints (simply merge with @ophints w/o checking anything).
2993 for sHint in sDisHints.split('|'):
2994 sHint = sHint.strip();
2995 if sHint.startswith('DISOPTYPE_'):
2996 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2997 if sShortHint in g_kdHints:
2998 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2999 else:
3000 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
3001 elif sHint != '0':
3002 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
3003
3004 for sHint in sIemHints.split('|'):
3005 sHint = sHint.strip();
3006 if sHint.startswith('IEMOPHINT_'):
3007 sShortHint = sHint[len('IEMOPHINT_'):].lower();
3008 if sShortHint in g_kdHints:
3009 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3010 else:
3011 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
3012 elif sHint != '0':
3013 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
3014
3015
3016 _ = sAsm;
3017 return True;
3018
3019 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
3020 """
3021 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
3022 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
3023 """
3024 if not asOperands:
3025 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3026 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
3027 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3028
3029 def checkCodeForMacro(self, sCode):
3030 """
3031 Checks code for relevant macro invocation.
3032 """
3033 #
3034 # Scan macro invocations.
3035 #
3036 if sCode.find('(') > 0:
3037 # Look for instruction decoder function definitions. ASSUME single line.
3038 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3039 [ 'FNIEMOP_DEF',
3040 'FNIEMOP_STUB',
3041 'FNIEMOP_STUB_1',
3042 'FNIEMOP_UD_STUB',
3043 'FNIEMOP_UD_STUB_1' ]);
3044 if asArgs is not None:
3045 sFunction = asArgs[1];
3046
3047 if not self.aoCurInstrs:
3048 self.addInstruction();
3049 for oInstr in self.aoCurInstrs:
3050 if oInstr.iLineFnIemOpMacro == -1:
3051 oInstr.iLineFnIemOpMacro = self.iLine;
3052 else:
3053 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3054 self.setInstrunctionAttrib('sFunction', sFunction);
3055 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3056 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3057 if asArgs[0].find('STUB') > 0:
3058 self.doneInstructions();
3059 return True;
3060
3061 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3062 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3063 if asArgs is not None:
3064 if len(self.aoCurInstrs) == 1:
3065 oInstr = self.aoCurInstrs[0];
3066 if oInstr.sStats is None:
3067 oInstr.sStats = asArgs[1];
3068 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3069
3070 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3071 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3072 if asArgs is not None:
3073 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3074 []);
3075 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3076 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3077 if asArgs is not None:
3078 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3079 [asArgs[6],]);
3080 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3081 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3082 if asArgs is not None:
3083 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3084 [asArgs[6], asArgs[7]]);
3085 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3086 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3087 if asArgs is not None:
3088 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3089 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3090 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3091 # a_fIemHints)
3092 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3093 if asArgs is not None:
3094 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3095 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3096
3097 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3098 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3099 if asArgs is not None:
3100 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3101 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3102 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3103 if asArgs is not None:
3104 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3105 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3106 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3107 if asArgs is not None:
3108 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3109 [asArgs[4], asArgs[5],]);
3110 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3111 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3112 if asArgs is not None:
3113 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3114 [asArgs[4], asArgs[5], asArgs[6],]);
3115 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3116 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3117 if asArgs is not None:
3118 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3119 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3120
3121 return False;
3122
3123
3124 def parse(self):
3125 """
3126 Parses the given file.
3127 Returns number or errors.
3128 Raises exception on fatal trouble.
3129 """
3130 #self.debug('Parsing %s' % (self.sSrcFile,));
3131
3132 while self.iLine < len(self.asLines):
3133 sLine = self.asLines[self.iLine];
3134 self.iLine += 1;
3135
3136 # We only look for comments, so only lines with a slash might possibly
3137 # influence the parser state.
3138 offSlash = sLine.find('/');
3139 if offSlash >= 0:
3140 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3141 offLine = 0;
3142 while offLine < len(sLine):
3143 if self.iState == self.kiCode:
3144 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3145 if offHit >= 0:
3146 self.checkCodeForMacro(sLine[offLine:offHit]);
3147 self.sComment = '';
3148 self.iCommentLine = self.iLine;
3149 self.iState = self.kiCommentMulti;
3150 offLine = offHit + 2;
3151 else:
3152 self.checkCodeForMacro(sLine[offLine:]);
3153 offLine = len(sLine);
3154
3155 elif self.iState == self.kiCommentMulti:
3156 offHit = sLine.find('*/', offLine);
3157 if offHit >= 0:
3158 self.sComment += sLine[offLine:offHit];
3159 self.iState = self.kiCode;
3160 offLine = offHit + 2;
3161 self.parseComment();
3162 else:
3163 self.sComment += sLine[offLine:];
3164 offLine = len(sLine);
3165 else:
3166 assert False;
3167 # C++ line comment.
3168 elif offSlash > 0:
3169 self.checkCodeForMacro(sLine[:offSlash]);
3170
3171 # No slash, but append the line if in multi-line comment.
3172 elif self.iState == self.kiCommentMulti:
3173 #self.debug('line %d: multi' % (self.iLine,));
3174 self.sComment += sLine;
3175
3176 # No slash, but check code line for relevant macro.
3177 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3178 #self.debug('line %d: macro' % (self.iLine,));
3179 self.checkCodeForMacro(sLine);
3180
3181 # If the line is a '}' in the first position, complete the instructions.
3182 elif self.iState == self.kiCode and sLine[0] == '}':
3183 #self.debug('line %d: }' % (self.iLine,));
3184 self.doneInstructions();
3185
3186 self.doneInstructions();
3187 self.debug('%3s stubs out of %3s instructions in %s'
3188 % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3189 return self.printErrors();
3190
3191
3192def __parseFileByName(sSrcFile, sDefaultMap):
3193 """
3194 Parses one source file for instruction specfications.
3195 """
3196 #
3197 # Read sSrcFile into a line array.
3198 #
3199 try:
3200 oFile = open(sSrcFile, "r");
3201 except Exception as oXcpt:
3202 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3203 try:
3204 asLines = oFile.readlines();
3205 except Exception as oXcpt:
3206 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3207 finally:
3208 oFile.close();
3209
3210 #
3211 # Do the parsing.
3212 #
3213 try:
3214 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3215 except ParserException as oXcpt:
3216 print(str(oXcpt));
3217 raise;
3218 except Exception as oXcpt:
3219 raise;
3220
3221 return cErrors;
3222
3223
3224def __doTestCopying():
3225 """
3226 Executes the asCopyTests instructions.
3227 """
3228 asErrors = [];
3229 for oDstInstr in g_aoAllInstructions:
3230 if oDstInstr.asCopyTests:
3231 for sSrcInstr in oDstInstr.asCopyTests:
3232 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3233 if oSrcInstr:
3234 aoSrcInstrs = [oSrcInstr,];
3235 else:
3236 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3237 if aoSrcInstrs:
3238 for oSrcInstr in aoSrcInstrs:
3239 if oSrcInstr != oDstInstr:
3240 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3241 else:
3242 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3243 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3244 else:
3245 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3246 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3247
3248 if asErrors:
3249 sys.stderr.write(u''.join(asErrors));
3250 return len(asErrors);
3251
3252
3253def __applyOnlyTest():
3254 """
3255 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3256 all other instructions so that only these get tested.
3257 """
3258 if g_aoOnlyTestInstructions:
3259 for oInstr in g_aoAllInstructions:
3260 if oInstr.aoTests:
3261 if oInstr not in g_aoOnlyTestInstructions:
3262 oInstr.aoTests = [];
3263 return 0;
3264
3265def __parseAll():
3266 """
3267 Parses all the IEMAllInstruction*.cpp.h files.
3268
3269 Raises exception on failure.
3270 """
3271 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3272 cErrors = 0;
3273 for sDefaultMap, sName in [
3274 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3275 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3276 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3277 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3278 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3279 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3280 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3281 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3282 ]:
3283 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3284 cErrors += __doTestCopying();
3285 cErrors += __applyOnlyTest();
3286
3287 if cErrors != 0:
3288 #raise Exception('%d parse errors' % (cErrors,));
3289 sys.exit(1);
3290 return True;
3291
3292
3293
3294__parseAll();
3295
3296
3297#
3298# Generators (may perhaps move later).
3299#
3300def generateDisassemblerTables(oDstFile = sys.stdout):
3301 """
3302 Generates disassembler tables.
3303 """
3304
3305 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3306 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3307 assert oMap.sName == sName;
3308 asLines = [];
3309
3310 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3311 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3312 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3313 asLines.append('{');
3314
3315 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3316
3317 aoTableOrder = oMap.getInstructionsInTableOrder();
3318 for iInstr, oInstr in enumerate(aoTableOrder):
3319
3320 if (iInstr & 0xf) == 0:
3321 if iInstr != 0:
3322 asLines.append('');
3323 asLines.append(' /* %x */' % (iInstr >> 4,));
3324
3325 if oInstr is None:
3326 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3327 elif isinstance(oInstr, list):
3328 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3329 else:
3330 sMacro = 'OP';
3331 cMaxOperands = 3;
3332 if len(oInstr.aoOperands) > 3:
3333 sMacro = 'OPVEX'
3334 cMaxOperands = 4;
3335 assert len(oInstr.aoOperands) <= cMaxOperands;
3336
3337 #
3338 # Format string.
3339 #
3340 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3341 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3342 sTmp += ' ' if iOperand == 0 else ',';
3343 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3344 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3345 else:
3346 sTmp += g_kdOpTypes[oOperand.sType][2];
3347 sTmp += '",';
3348 asColumns = [ sTmp, ];
3349
3350 #
3351 # Decoders.
3352 #
3353 iStart = len(asColumns);
3354 if oInstr.sEncoding is None:
3355 pass;
3356 elif oInstr.sEncoding == 'ModR/M':
3357 # ASSUME the first operand is using the ModR/M encoding
3358 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3359 asColumns.append('IDX_ParseModRM,');
3360 ## @todo IDX_ParseVexDest
3361 # Is second operand using ModR/M too?
3362 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3363 asColumns.append('IDX_UseModRM,')
3364 elif oInstr.sEncoding in [ 'prefix', ]:
3365 for oOperand in oInstr.aoOperands:
3366 asColumns.append('0,');
3367 elif oInstr.sEncoding in [ 'fixed' ]:
3368 pass;
3369 elif oInstr.sEncoding == 'vex2':
3370 asColumns.append('IDX_ParseVex2b,')
3371 elif oInstr.sEncoding == 'vex3':
3372 asColumns.append('IDX_ParseVex3b,')
3373 elif oInstr.sEncoding in g_dInstructionMaps:
3374 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3375 else:
3376 ## @todo
3377 #IDX_ParseTwoByteEsc,
3378 #IDX_ParseGrp1,
3379 #IDX_ParseShiftGrp2,
3380 #IDX_ParseGrp3,
3381 #IDX_ParseGrp4,
3382 #IDX_ParseGrp5,
3383 #IDX_Parse3DNow,
3384 #IDX_ParseGrp6,
3385 #IDX_ParseGrp7,
3386 #IDX_ParseGrp8,
3387 #IDX_ParseGrp9,
3388 #IDX_ParseGrp10,
3389 #IDX_ParseGrp12,
3390 #IDX_ParseGrp13,
3391 #IDX_ParseGrp14,
3392 #IDX_ParseGrp15,
3393 #IDX_ParseGrp16,
3394 #IDX_ParseThreeByteEsc4,
3395 #IDX_ParseThreeByteEsc5,
3396 #IDX_ParseModFence,
3397 #IDX_ParseEscFP,
3398 #IDX_ParseNopPause,
3399 #IDX_ParseInvOpModRM,
3400 assert False, str(oInstr);
3401
3402 # Check for immediates and stuff in the remaining operands.
3403 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3404 sIdx = g_kdOpTypes[oOperand.sType][0];
3405 if sIdx != 'IDX_UseModRM':
3406 asColumns.append(sIdx + ',');
3407 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3408
3409 #
3410 # Opcode and operands.
3411 #
3412 assert oInstr.sDisEnum, str(oInstr);
3413 asColumns.append(oInstr.sDisEnum + ',');
3414 iStart = len(asColumns)
3415 for oOperand in oInstr.aoOperands:
3416 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3417 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3418
3419 #
3420 # Flags.
3421 #
3422 sTmp = '';
3423 for sHint in sorted(oInstr.dHints.keys()):
3424 sDefine = g_kdHints[sHint];
3425 if sDefine.startswith('DISOPTYPE_'):
3426 if sTmp:
3427 sTmp += ' | ' + sDefine;
3428 else:
3429 sTmp += sDefine;
3430 if sTmp:
3431 sTmp += '),';
3432 else:
3433 sTmp += '0),';
3434 asColumns.append(sTmp);
3435
3436 #
3437 # Format the columns into a line.
3438 #
3439 sLine = '';
3440 for i, s in enumerate(asColumns):
3441 if len(sLine) < aoffColumns[i]:
3442 sLine += ' ' * (aoffColumns[i] - len(sLine));
3443 else:
3444 sLine += ' ';
3445 sLine += s;
3446
3447 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3448 # DISOPTYPE_HARMLESS),
3449 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3450 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3451
3452 asLines.append(sLine);
3453
3454 asLines.append('};');
3455 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3456
3457 #
3458 # Write out the lines.
3459 #
3460 oDstFile.write('\n'.join(asLines));
3461 oDstFile.write('\n');
3462 break; #for now
3463
3464if __name__ == '__main__':
3465 generateDisassemblerTables();
3466
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette