IEMAllInstructionsPython.py@ 65828

Last change on this file since 65828 was 65828, checked in by vboxsync, 8 years ago
python 3 fixes.
Property svn:eol-style set to `LF` Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 76.0 KB

Line
1	#!/usr/bin/env python
2	# -- coding: utf-8 --
3	# $Id: IEMAllInstructionsPython.py 65828 2017-02-21 09:55:39Z vboxsync $
4
5	"""
6	IEM instruction extractor.
7
8	This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9	collects information about the instructions. It can then be used to generate
10	disassembler tables and tests.
11	"""
12
13	__copyright__ = \
14	"""
15	Copyright (C) 2017 Oracle Corporation
16
17	This file is part of VirtualBox Open Source Edition (OSE), as
18	available from http://www.virtualbox.org. This file is free software;
19	you can redistribute it and/or modify it under the terms of the GNU
20	General Public License (GPL) as published by the Free Software
21	Foundation, in version 2 as it comes in the "COPYING" file of the
22	VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23	hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25	The contents of this file may alternatively be used under the terms
26	of the Common Development and Distribution License Version 1.0
27	(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28	VirtualBox OSE distribution, in which case the provisions of the
29	CDDL are applicable instead of those of the GPL.
30
31	You may elect to license modified versions of this file under the
32	terms and conditions of either the GPL or the CDDL or both.
33	"""
34	__version__ = "$Revision: 65828 $"
35
36	# Standard python imports.
37	import os
38	import re
39	import sys
40
41	# Only the main script needs to modify the path.
42	g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
43	'ValidationKit');
44	sys.path.append(g_ksValidationKitDir);
45
46	from common import utils;
47
48	# Python 3 hacks:
49	if sys.version_info[0] >= 3:
50	long = int; # pylint: disable=redefined-builtin,invalid-name
51
52
53	# Annotation example:
54	#
55	# \@opmnemonic add
56	# \@op1 reg:Eb
57	# \@op2 rm:Gb
58	# \@opmaps onebyte
59	# \@oppfx none
60	# \@opcode 0x00
61	# \@openc ModR/M
62	# \@opfltest none
63	# \@opflmodify of,sz,zf,af,pf,cf
64	# \@opflundef none
65	# \@opflset none
66	# \@opflclear none
67	# \@ophints harmless
68	# \@opstats add_Eb_Gb
69	# \@opgroup op_gen_arith_bin
70	# \@optest in1=1 in2=1 -> out1=2 outfl=a?,p?
71	# \@optest oppfx:o32 in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
72
73
74	def _isValidOpcodeByte(sOpcode):
75	"""
76	Checks if sOpcode is a valid lower case opcode byte.
77	Returns true/false.
78	"""
79	if len(sOpcode) == 4:
80	if sOpcode[:2] == '0x':
81	if sOpcode[2] in '0123456789abcdef':
82	if sOpcode[3] in '0123456789abcdef':
83	return True;
84	return False;
85
86
87	class InstructionMap(object):
88	"""
89	Instruction map.
90
91	The opcode map provides the lead opcode bytes (empty for the one byte
92	opcode map). An instruction can be member of multiple opcode maps as long
93	as it uses the same opcode value within the map (because of VEX).
94	"""
95
96	kdEncodings = {
97	'legacy': [],
98	'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
99	'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
100	'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
101	'xop8': [], ##< XOP prefix with vvvvv = 8
102	'xop9': [], ##< XOP prefix with vvvvv = 9
103	'xop10': [], ##< XOP prefix with vvvvv = 10
104	};
105	kdSelectors = {
106	'byte': [], ##< next opcode byte selects the instruction (default).
107	'/r': [], ##< modrm.reg selects the instruction.
108	'mod /r': [], ##< modrm.reg and modrm.mod selects the instruction.
109	'!11 /r': [], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
110	'11 /r': [], ##< modrm.reg select the instruction with modrm.mod == 0y11.
111	'11': [], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
112	};
113
114	def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy'):
115	assert sSelector in self.kdSelectors;
116	assert sEncoding in self.kdEncodings;
117	if asLeadOpcodes is None:
118	asLeadOpcodes = [];
119	else:
120	for sOpcode in asLeadOpcodes:
121	assert _isValidOpcodeByte(sOpcode);
122
123	self.sName = sName;
124	self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
125	self.sSelector = sSelector; ##< The member selector, see kdSelectors.
126	self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
127	self.aoInstructions = []; # type: Instruction
128
129
130	class TestType(object):
131	"""
132	Test value type.
133
134	This base class deals with integer like values. The fUnsigned constructor
135	parameter indicates the default stance on zero vs sign extending. It is
136	possible to override fUnsigned=True by prefixing the value with '+' or '-'.
137	"""
138	def __init__(self, sName, fUnsigned = True):
139	self.sName = sName;
140	self.fUnsigned = fUnsigned;
141
142	class BadValue(Exception):
143	""" Bad value exception. """
144	def __init__(self, sMessage):
145	Exception.__init__(sMessage);
146	self.sMessage = sMessage;
147
148	def get(self, sValue):
149	"""
150	Get the shortest byte representation of oValue.
151
152	Returns (fSignExtend, bytearray)
153	Raises BadValue if invalid value.
154
155	The returned byte array is a reasonable size, e.g. for an integer type
156	it's for instance 1, 2, 4, or 8 byte in size but never 3, 5 or 7 bytes.
157	"""
158	if len(sValue) == 0:
159	raise TestType.BadValue('empty value');
160
161	# Deal with sign and detect hexadecimal or decimal.
162	fSignExtend = not self.fUnsigned;
163	if sValue[0] == '-' or sValue[0] == '+':
164	fSignExtend = True;
165	fHex = len(sValue) > 3 and sValue[1:2].lower() == '0x';
166	else:
167	fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
168
169	# try convert it to long integer.
170	try:
171	iValue = long(sValue, 16 if fHex else 10);
172	except:
173	raise TestType.BadValue('failed to convert "%s" to integer' % (iValue,));
174
175	# Convert the hex string and pad it to a decent value.
176	sHex = hex(iValue);
177	assert sHex[:2] == '0x', sHex;
178	if sys.version_info[0] >= 3:
179	sHex = sHex[2:];
180	else:
181	assert sHex[-1] == 'L';
182	sHex = sHex[2:-1];
183
184	cDigits = len(sHex);
185	if cDigits <= 2:
186	cDigits = (cDigits + 1) & ~1;
187	elif cDigits <= 4:
188	cDigits = (cDigits + 3) & ~3;
189	elif cDigits <= 8:
190	cDigits = (cDigits + 7) & ~7;
191	else:
192	cDigits = (cDigits + 15) & ~15;
193
194	if cDigits != len(sHex):
195	if iValue >= 0:
196	sHex = '0' * (cDigits - len(sHex)) + sHex;
197	else:
198	sHex = 'f' * (cDigits - len(sHex)) + sHex;
199
200	# Invert and convert to bytearray and return it.
201	abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
202
203	return (fSignExtend, abValue);
204
205	def validate(self, sValue):
206	"""
207	Returns True if value is okay, error message on failure.
208	"""
209	try:
210	self.get(sValue);
211	except TestType.BadValue as oXcpt:
212	return oXcpt.sMessage;
213	return True;
214
215
216
217	class TestTypeEflags(TestType):
218	"""
219	Special value parsing for EFLAGS/RFLAGS/FLAGS.
220	"""
221
222	def __init__(self, sName):
223	TestType.__init__(self, sName, fUnsigned = True);
224
225	def get(self, sValue):
226
227	return None;
228
229
230
231	class TestInOut(object):
232	"""
233	One input or output state modifier.
234
235	This should be thought as values to modify BS3REGCTX and extended (needs
236	to be structured) state.
237	"""
238	## Assigned operators.
239	kasOperators = [
240	'&~=',
241	'&=',
242	'\|=',
243	'='
244	];
245	## Types
246	kdTypes = {
247	'uint': TestType('uint', fUnsigned = True),
248	'int': TestType('int'),
249	'efl': TestTypeEflags('efl'),
250	};
251	## CPU context fields.
252	kdFields = {
253	# name: ( default type, tbd, )
254	# Operands.
255	'op1': ( 'uint', '', ), ## \@op1
256	'op2': ( 'uint', '', ), ## \@op2
257	'op3': ( 'uint', '', ), ## \@op3
258	'op4': ( 'uint', '', ), ## \@op4
259	# Flags.
260	'efl': ( 'efl', '', ),
261	# 8-bit GPRs.
262	'al': ( 'uint', '', ),
263	'cl': ( 'uint', '', ),
264	'dl': ( 'uint', '', ),
265	'bl': ( 'uint', '', ),
266	'ah': ( 'uint', '', ),
267	'ch': ( 'uint', '', ),
268	'dh': ( 'uint', '', ),
269	'bh': ( 'uint', '', ),
270	'r8l': ( 'uint', '', ),
271	'r9l': ( 'uint', '', ),
272	'r10l': ( 'uint', '', ),
273	'r11l': ( 'uint', '', ),
274	'r12l': ( 'uint', '', ),
275	'r13l': ( 'uint', '', ),
276	'r14l': ( 'uint', '', ),
277	'r15l': ( 'uint', '', ),
278	# 16-bit GPRs.
279	'ax': ( 'uint', '', ),
280	'dx': ( 'uint', '', ),
281	'cx': ( 'uint', '', ),
282	'bx': ( 'uint', '', ),
283	'sp': ( 'uint', '', ),
284	'bp': ( 'uint', '', ),
285	'si': ( 'uint', '', ),
286	'di': ( 'uint', '', ),
287	'r8w': ( 'uint', '', ),
288	'r9w': ( 'uint', '', ),
289	'r10w': ( 'uint', '', ),
290	'r11w': ( 'uint', '', ),
291	'r12w': ( 'uint', '', ),
292	'r13w': ( 'uint', '', ),
293	'r14w': ( 'uint', '', ),
294	'r15w': ( 'uint', '', ),
295	# 32-bit GPRs.
296	'eax': ( 'uint', '', ),
297	'edx': ( 'uint', '', ),
298	'ecx': ( 'uint', '', ),
299	'ebx': ( 'uint', '', ),
300	'esp': ( 'uint', '', ),
301	'ebp': ( 'uint', '', ),
302	'esi': ( 'uint', '', ),
303	'edi': ( 'uint', '', ),
304	'r8d': ( 'uint', '', ),
305	'r9d': ( 'uint', '', ),
306	'r10d': ( 'uint', '', ),
307	'r11d': ( 'uint', '', ),
308	'r12d': ( 'uint', '', ),
309	'r13d': ( 'uint', '', ),
310	'r14d': ( 'uint', '', ),
311	'r15d': ( 'uint', '', ),
312	# 64-bit GPRs.
313	'rax': ( 'uint', '', ),
314	'rdx': ( 'uint', '', ),
315	'rcx': ( 'uint', '', ),
316	'rbx': ( 'uint', '', ),
317	'rsp': ( 'uint', '', ),
318	'rbp': ( 'uint', '', ),
319	'rsi': ( 'uint', '', ),
320	'rdi': ( 'uint', '', ),
321	'r8': ( 'uint', '', ),
322	'r9': ( 'uint', '', ),
323	'r10': ( 'uint', '', ),
324	'r11': ( 'uint', '', ),
325	'r12': ( 'uint', '', ),
326	'r13': ( 'uint', '', ),
327	'r14': ( 'uint', '', ),
328	'r15': ( 'uint', '', ),
329	# 16-bit, 32-bit or 64-bit registers according to operand size.
330	'oz.rax': ( 'uint', '', ),
331	'oz.rdx': ( 'uint', '', ),
332	'oz.rcx': ( 'uint', '', ),
333	'oz.rbx': ( 'uint', '', ),
334	'oz.rsp': ( 'uint', '', ),
335	'oz.rbp': ( 'uint', '', ),
336	'oz.rsi': ( 'uint', '', ),
337	'oz.rdi': ( 'uint', '', ),
338	'oz.r8': ( 'uint', '', ),
339	'oz.r9': ( 'uint', '', ),
340	'oz.r10': ( 'uint', '', ),
341	'oz.r11': ( 'uint', '', ),
342	'oz.r12': ( 'uint', '', ),
343	'oz.r13': ( 'uint', '', ),
344	'oz.r14': ( 'uint', '', ),
345	'oz.r15': ( 'uint', '', ),
346	};
347
348	def __init__(self, sField, sOp, sValue, sType):
349	assert sField in self.kdFields;
350	assert sOp in self.kasOperators;
351	self.sField = sField;
352	self.sOp = sOp;
353	self.sValue = sValue;
354	self.sType = sType;
355
356
357	class TestSelector(object):
358	"""
359	One selector for an instruction test.
360	"""
361	## Selector compare operators.
362	kasCompareOps = [ '==', '!=' ];
363	## Selector variables and their valid values.
364	kdVariables = {
365	# Operand size.
366	'size': {
367	'o16': 'size_o16',
368	'o32': 'size_o32',
369	'o64': 'size_o64',
370	},
371	# Execution ring.
372	'ring': {
373	'0': 'ring_0',
374	'1': 'ring_1',
375	'2': 'ring_2',
376	'3': 'ring_3',
377	'0..2': 'ring_0_thru_2',
378	'1..3': 'ring_1_thru_3',
379	},
380	# Basic code mode.
381	'codebits': {
382	'64': 'code_64bit',
383	'32': 'code_32bit',
384	'16': 'code_16bit',
385	},
386	# cpu modes.
387	'mode': {
388	'real': 'mode_real',
389	'prot': 'mode_prot',
390	'long': 'mode_long',
391	'v86': 'mode_v86',
392	'smm': 'mode_smm',
393	'vmx': 'mode_vmx',
394	'svm': 'mode_svm',
395	},
396	# paging on/off
397	'paging': {
398	'on': 'paging_on',
399	'off': 'paging_off',
400	},
401	};
402	## Selector shorthand predicates.
403	## These translates into variable expressions.
404	kdPredicates = {
405	'o16': 'size==o16',
406	'o32': 'size==o32',
407	'o64': 'size==o64',
408	'ring0': 'ring==0',
409	'!ring0': 'ring==1..3',
410	'ring1': 'ring==1',
411	'ring2': 'ring==2',
412	'ring3': 'ring==3',
413	'user': 'ring==3',
414	'supervisor': 'ring==0..2',
415	'real': 'mode==real',
416	'prot': 'mode==prot',
417	'long': 'mode==long',
418	'v86': 'mode==v86',
419	'smm': 'mode==smm',
420	'vmx': 'mode==vmx',
421	'svm': 'mode==svm',
422	'paging': 'paging==on',
423	'!paging': 'paging==off',
424	};
425
426	def __init__(self, sVariable, sOp, sValue):
427	assert sVariable in self.kdVariables;
428	assert sOp in self.kasCompareOps;
429	assert sValue in self.kdVariables[sValue];
430	self.sVariable = sVariable;
431	self.sOp = sOp;
432	self.sValue = sValue;
433
434
435	class InstructionTest(object):
436	"""
437	Instruction test.
438	"""
439
440	def __init__(self, oInstr): # type: (InstructionTest, Instruction)
441	self.oInstr = oInstr; # type: InstructionTest
442	self.aoInputs = [];
443	self.aoOutputs = [];
444	self.aoSelectors = []; # type: list(TestSelector)
445
446
447	class Operand(object):
448	"""
449	Instruction operand.
450	"""
451
452	## \@op[1-4]
453	kdLocations = {
454	'reg': [], ## modrm.reg
455	'rm': [], ## modrm.rm
456	};
457
458	## \@op[1-4]
459	kdTypes = {
460	'Eb': [],
461	'Gb': [],
462	};
463
464	def __init__(self, sWhere, sType):
465	assert sWhere in self.kdLocations;
466	assert sType in self.kdTypes;
467	self.sWhere = sWhere; ##< kdLocations
468	self.sType = sType; ##< kdTypes
469
470
471	class Instruction(object):
472	"""
473	Instruction.
474	"""
475
476	def __init__(self, sSrcFile, iLine):
477	## @name Core attributes.
478	## @{
479	self.sMnemonic = None;
480	self.sBrief = None;
481	self.asDescSections = []; # type: list(str)
482	self.aoMaps = []; # type: list(InstructionMap)
483	self.aoOperands = []; # type: list(Operand)
484	self.sPrefix = None; ##< Single prefix: None, 0x66, 0xf3, 0xf2
485	self.sOpcode = None;
486	self.sEncoding = None;
487	self.asFlTest = None;
488	self.asFlModify = None;
489	self.asFlUndefined = None;
490	self.asFlSet = None;
491	self.asFlClear = None;
492	self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictioarny for speed; dummy value).
493	self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
494	self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
495	self.aoTests = []; # type: list(InstructionTest)
496	self.oCpus = None; ##< Some CPU restriction expression...
497	self.sGroup = None;
498	self.fUnused = False; ##< Unused instruction.
499	self.fInvalid = False; ##< Invalid instruction (like UD2).
500	self.sInvalidStyle = None; ##< Invalid behviour style
501	## @}
502
503	## @name Implementation attributes.
504	## @{
505	self.sStats = None;
506	self.sFunction = None;
507	self.fStub = False;
508	self.fUdStub = False;
509	## @}
510
511	## @name Decoding info
512	## @{
513	self.sSrcFile = sSrcFile;
514	self.iLineCreated = iLine;
515	self.iLineCompleted = None;
516	self.cOpTags = 0;
517	## @}
518
519	## @name Intermediate input fields.
520	## @{
521	self.sRawDisOpNo = None;
522	self.asRawDisParams = [];
523	self.sRawIemOpFlags = None;
524	self.sRawOldOpcodes = None;
525	## @}
526
527
528	## All the instructions.
529	g_aoAllInstructions = []; # type: Instruction
530
531	## Instruction maps.
532	g_dInstructionMaps = {
533	'one': InstructionMap('one'),
534	'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
535	'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
536	'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
537	'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
538	'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
539	'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
540	'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
541	'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
542	'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
543	'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
544	'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
545	'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
546	'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
547	'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
548	'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
549	'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
550	'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
551	'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
552	'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
553
554	'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',]),
555	'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
556	'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
557	'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
558	'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
559	'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
560	'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
561	'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
562	'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
563	'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
564	'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'mod /r'),
565	'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
566	'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
567	'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
568
569	'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
570	'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
571	'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
572
573	'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
574	'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
575	'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
576	'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
577	'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'mod /r'),
578	'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
579
580	'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
581	'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
582
583	'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
584	'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
585	'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
586	'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
587	'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
588	'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
589	'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
590	};
591
592
593
594	class ParserException(Exception):
595	""" Parser exception """
596	def __init__(self, sMessage):
597	Exception.__init__(self, sMessage);
598
599
600	class SimpleParser(object):
601	"""
602	Parser of IEMAllInstruction*.cpp.h instruction specifications.
603	"""
604
605	## @name Parser state.
606	## @{
607	kiCode = 0;
608	kiCommentMulti = 1;
609	## @}
610
611	def __init__(self, sSrcFile, asLines, sDefaultMap):
612	self.sSrcFile = sSrcFile;
613	self.asLines = asLines;
614	self.iLine = 0;
615	self.iState = self.kiCode;
616	self.sComment = '';
617	self.iCommentLine = 0;
618	self.asCurInstr = [];
619
620	assert sDefaultMap in g_dInstructionMaps;
621	self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
622
623	self.cTotalInstr = 0;
624	self.cTotalStubs = 0;
625	self.cTotalTagged = 0;
626
627	self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
628	self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
629	self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
630	self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
631	self.oReGroupName = re.compile('^op_[a-z0-9]+(\|_[a-z0-9]+\|_[a-z0-9]+_[a-z0-9]+)$');
632	self.fDebug = True;
633
634	self.dTagHandlers = {
635	'@opbrief': self.parseTagOpBrief,
636	'@opdesc': self.parseTagOpDesc,
637	'@opmnemonic': self.parseTagOpMnemonic,
638	'@op1': self.parseTagOpOperandN,
639	'@op2': self.parseTagOpOperandN,
640	'@op3': self.parseTagOpOperandN,
641	'@op4': self.parseTagOpOperandN,
642	'@oppfx': self.parseTagOpPfx,
643	'@opmaps': self.parseTagOpMaps,
644	'@opcode': self.parseTagOpcode,
645	'@openc': self.parseTagOpEnc,
646	'@opfltest': self.parseTagOpEFlags,
647	'@opflmodify': self.parseTagOpEFlags,
648	'@opflundef': self.parseTagOpEFlags,
649	'@opflset': self.parseTagOpEFlags,
650	'@opflclear': self.parseTagOpEFlags,
651	'@ophints': self.parseTagOpHints,
652	'@opcpuid': self.parseTagOpCpuId,
653	'@opgroup': self.parseTagOpGroup,
654	'@opunused': self.parseTagOpUnusedInvalid,
655	'@opinvalid': self.parseTagOpUnusedInvalid,
656	'@opinvlstyle': self.parseTagOpUnusedInvalid,
657	'@optest': self.parseTagOpTest,
658	'@opstats': self.parseTagOpStats,
659	'@opfunction': self.parseTagOpFunction,
660	'@opdone': self.parseTagOpDone,
661	};
662
663	self.asErrors = [];
664
665	def raiseError(self, sMessage):
666	"""
667	Raise error prefixed with the source and line number.
668	"""
669	raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
670
671	def raiseCommentError(self, iLineInComment, sMessage):
672	"""
673	Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
674	"""
675	raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
676
677	def error(self, sMessage):
678	"""
679	Adds an error.
680	returns False;
681	"""
682	self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
683	return False;
684
685	def errorComment(self, iLineInComment, sMessage):
686	"""
687	Adds a comment error.
688	returns False;
689	"""
690	self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
691	return False;
692
693	def printErrors(self):
694	"""
695	Print the errors to stderr.
696	Returns number of errors.
697	"""
698	if len(self.asErrors) > 0:
699	sys.stderr.write(u''.join(self.asErrors));
700	return len(self.asErrors);
701
702	def debug(self, sMessage):
703	"""
704	"""
705	if self.fDebug:
706	print('debug: %s' % (sMessage,));
707
708
709	def addInstruction(self, iLine = None):
710	"""
711	Adds an instruction.
712	"""
713	oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
714	g_aoAllInstructions.append(oInstr);
715	self.asCurInstr.append(oInstr);
716	return oInstr;
717
718	def doneInstructionOne(self, oInstr, iLine):
719	"""
720	Complete the parsing by processing, validating and expanding raw inputs.
721	"""
722	assert oInstr.iLineCompleted is None;
723	oInstr.iLineCompleted = iLine;
724
725	#
726	# Specified instructions.
727	#
728	if oInstr.cOpTags > 0:
729	if oInstr.sStats is None:
730	pass;
731
732	#
733	# Unspecified legacy stuff. We generally only got a few things to go on here.
734	# /** Opcode 0x0f 0x00 /0. */
735	# FNIEMOPRM_DEF(iemOp_Grp6_sldt)
736	#
737	else:
738	#if oInstr.sRawOldOpcodes:
739	#
740	#if oInstr.sMnemonic:
741	pass;
742
743	#
744	# Apply default map and then add the instruction to all it's groups.
745	#
746	if len(oInstr.aoMaps) == 0:
747	oInstr.aoMaps = [ self.oDefaultMap, ];
748	for oMap in oInstr.aoMaps:
749	oMap.aoInstructions.append(oInstr);
750
751	self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
752	return True;
753
754	def doneInstructions(self, iLineInComment = None):
755	"""
756	Done with current instruction.
757	"""
758	for oInstr in self.asCurInstr:
759	self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
760	if oInstr.fStub:
761	self.cTotalStubs += 1;
762
763	self.cTotalInstr += len(self.asCurInstr);
764
765	self.sComment = '';
766	self.asCurInstr = [];
767	return True;
768
769	def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
770	"""
771	Sets the sAttrib of all current instruction to oValue. If fOverwrite
772	is False, only None values and empty strings are replaced.
773	"""
774	for oInstr in self.asCurInstr:
775	if fOverwrite is not True:
776	oOldValue = getattr(oInstr, sAttrib);
777	if oOldValue is not None:
778	continue;
779	setattr(oInstr, sAttrib, oValue);
780
781	def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
782	"""
783	Sets the iEntry of the array sAttrib of all current instruction to oValue.
784	If fOverwrite is False, only None values and empty strings are replaced.
785	"""
786	for oInstr in self.asCurInstr:
787	aoArray = getattr(oInstr, sAttrib);
788	while len(aoArray) <= iEntry:
789	aoArray.append(None);
790	if fOverwrite is True or aoArray[iEntry] is None:
791	aoArray[iEntry] = oValue;
792
793	def parseCommentOldOpcode(self, asLines):
794	""" Deals with 'Opcode 0xff /4' like comments """
795	asWords = asLines[0].split();
796	if len(asWords) >= 2 \
797	and asWords[0] == 'Opcode' \
798	and ( asWords[1].startswith('0x')
799	or asWords[1].startswith('0X')):
800	asWords = asWords[:1];
801	for iWord, sWord in enumerate(asWords):
802	if sWord.startswith('0X'):
803	sWord = '0x' + sWord[:2];
804	asWords[iWord] = asWords;
805	self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
806
807	return False;
808
809	def ensureInstructionForOpTag(self, iTagLine):
810	""" Ensure there is an instruction for the op-tag being parsed. """
811	if len(self.asCurInstr) == 0:
812	self.addInstruction(self.iCommentLine + iTagLine);
813	for oInstr in self.asCurInstr:
814	oInstr.cOpTags += 1;
815	if oInstr.cOpTags == 1:
816	self.cTotalTagged += 1;
817	return self.asCurInstr[-1];
818
819	@staticmethod
820	def flattenSections(aasSections):
821	"""
822	Flattens multiline sections into stripped single strings.
823	Returns list of strings, on section per string.
824	"""
825	asRet = [];
826	for asLines in assSections:
827	if len(asLines) > 0:
828	asRet.append(' '.join([sLine.strip() for sLine in asLines]));
829	return asRet;
830
831	@staticmethod
832	def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
833	"""
834	Flattens sections into a simple stripped string with newlines as
835	section breaks. The final section does not sport a trailing newline.
836	"""
837	# Typical: One section with a single line.
838	if len(aasSections) == 1 and len(aasSections[0]) == 1:
839	return aasSections[0][0].strip();
840
841	sRet = '';
842	for iSection, asLines in enumerate(aasSections):
843	if len(asLines) > 0:
844	if iSection > 0:
845	sRet += sSectionSep;
846	sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
847	return sRet;
848
849
850
851	## @name Tag parsers
852	## @{
853
854	def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
855	"""
856	Tag: \@opbrief
857	Value: Text description, multiple sections, appended.
858
859	Brief description. If not given, it's the first sentence from @opdesc.
860	"""
861	oInstr = self.ensureInstructionForOpTag(iTagLine);
862
863	# Flatten and validate the value.
864	sBrief = self.flattenAllSections(aasSections);
865	if len(sBrief) == 0:
866	return self.errorComment(iTagLine, '%s: value required' % (sTag,));
867	if sBrief[-1] != '.':
868	sBrief = sBrief + '.';
869	if len(sBrief) > 180:
870	return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
871	offDot = sBrief.find('.');
872	while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
873	offDot = sBrief.find('.', offDot + 1);
874	if offDot >= 0 and offDot != len(sBrief) - 1:
875	return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
876
877	# Update the instruction.
878	if oInstr.sBrief is not None:
879	return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
880	% (sTag, oInstr.sBrief, sBrief,));
881	_ = iEndLine;
882	return True;
883
884	def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
885	"""
886	Tag: \@opdesc
887	Value: Text description, multiple sections, appended.
888
889	It is used to describe instructions.
890	"""
891	oInstr = self.ensureInstructionForOpTag(iTagLine);
892	if len(self.aoInstructions) > 0 and len(aasSections) > 0:
893	oInstr.asDescSections.extend(self.flattenSections(aasSections));
894	return True;
895
896	_ = sTag; _ = iEndLine;
897	return True;
898
899	def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
900	"""
901	Tag: @opmenmonic
902	Value: mnemonic
903
904	The 'mnemonic' value must be a valid C identifier string. Because of
905	prefixes, groups and whatnot, there times when the mnemonic isn't that
906	of an actual assembler mnemonic.
907	"""
908	oInstr = self.ensureInstructionForOpTag(iTagLine);
909
910	# Flatten and validate the value.
911	sMnemonic = self.flattenAllSections(aasSections);
912	if not self.oReMnemonic.match(sMnemonic):
913	return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
914	if oInstr.sMnemonic is not None:
915	return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
916	% (sTag, oInstr.sMnemonic, sMnemonic,));
917	oInstr.sMnemonic = sMnemonic
918
919	_ = iEndLine;
920	return True;
921
922	def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
923	"""
924	Tags: \@op1, \@op2, \@op3, \@op4
925	Value: where:type
926
927	The 'where' value indicates where the operand is found, like the 'reg'
928	part of the ModR/M encoding. See Instruction.kdOperandLocations for
929	a list.
930
931	The 'type' value indicates the operand type. These follow the types
932	given in the opcode tables in the CPU reference manuals.
933	See Instruction.kdOperandTypes for a list.
934
935	"""
936	oInstr = self.ensureInstructionForOpTag(iTagLine);
937	idxOp = int(sTag[-1]) - 1;
938	assert idxOp >= 0 and idxOp < 4;
939
940	# flatten, split up, and validate the "where:type" value.
941	sFlattened = self.flattenAllSections(aasSections);
942	asSplit = sFlattened.split(':');
943	if len(asSplit) != 2:
944	return self.errorComment(iTagLine, 'expected %s value on format "<where>:<type>" not "%s"' % (sTag, sFlattened,));
945
946	(sWhere, sType) = asSplit;
947	if sWhere not in Operand.kdLocations:
948	return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
949	% (sTag, sWhere, ', '.join(Operand.kdLocations.keys()),), iTagLine);
950
951	if sType not in Operand.kdTypes:
952	return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
953	% (sTag, sType, ', '.join(Operand.kdTypes.keys()),));
954
955	# Insert the operand, refusing to overwrite an existing one.
956	while idxOp >= len(oInstr.aoOperands):
957	oInstr.aoOperands.append(None);
958	if oInstr.aoOperands[idxOp] is not None:
959	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
960	% ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
961	sWhere, sType,));
962	oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
963
964	_ = iEndLine;
965	return True;
966
967	def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
968	"""
969	Tag: \@opmaps
970	Value: map[,map2]
971
972	Indicates which maps the instruction is in. There is a default map
973	associated with each input file.
974	"""
975	oInstr = self.ensureInstructionForOpTag(iTagLine);
976
977	# Flatten, split up and validate the value.
978	sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
979	asMaps = sFlattened.split(',');
980	if len(asMaps) == 0:
981	return self.errorComment(iTagLine, '%s: value required' % (sTag,));
982	for sMap in asMaps:
983	if sMap not in g_dInstructionMaps:
984	return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
985	% (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
986
987	# Add the maps to the current list. Throw errors on duplicates.
988	for oMap in oInstr.aoMaps:
989	if oMap.sName in asMaps:
990	return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
991
992	for sMap in asMaps:
993	oMap = g_dInstructionMaps[sMap];
994	if oMap not in oInstr.aoMaps:
995	oInstr.aoMaps.append(oMap);
996	else:
997	self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
998
999	_ = iEndLine;
1000	return True;
1001
1002	## \@oppfx values.
1003	kdPrefixes = {
1004	'0x66': [],
1005	'0xf3': [],
1006	'0xf2': [],
1007	};
1008
1009	def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
1010	"""
1011	Tag: \@oppfx
1012	Value: none\|0x66\|0xf3\|0xf2
1013
1014	Required prefix for the instruction. (In a (E)VEX context this is the
1015	value of the 'pp' field rather than an actual prefix.)
1016	"""
1017	oInstr = self.ensureInstructionForOpTag(iTagLine);
1018
1019	# Flatten and validate the value.
1020	sFlattened = self.flattenAllSections(aasSections);
1021	asPrefixes = sFlattened.split();
1022	if len(asPrefixes) > 1:
1023	return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
1024
1025	sPrefix = asPrefixes[0].lower();
1026	if sPrefix == 'none':
1027	sPrefix = None;
1028	else:
1029	if len(sPrefix) == 2:
1030	sPrefix = '0x' + sPrefix;
1031	if _isValidOpcodeByte(sPrefix):
1032	return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
1033
1034	if sPrefix is not None and sPrefix not in self.kdPrefixes:
1035	return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, self.kdPrefixes,));
1036
1037	# Set it.
1038	if oInstr.sPrefix is not None:
1039	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
1040	oInstr.sPrefix = sPrefix;
1041
1042	_ = iEndLine;
1043	return True;
1044
1045	## Special \@opcode tag values.
1046	kdSpecialOpcodes = {
1047	'/reg': [],
1048	'mr/reg': [],
1049	'11 /reg': [],
1050	'!11 /reg': [],
1051	'11 mr/reg': [],
1052	'!11 mr/reg': [],
1053	};
1054
1055	def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
1056	"""
1057	Tag: \@opcode
1058	Value: 0x?? \| /reg \| mr/reg \| 11 /reg \| !11 /reg \| 11 mr/reg \| !11 mr/reg
1059
1060	The opcode byte or sub-byte for the instruction in the context of a map.
1061	"""
1062	oInstr = self.ensureInstructionForOpTag(iTagLine);
1063
1064	# Flatten and validate the value.
1065	sOpcode = self.flattenAllSections(aasSections);
1066	if sOpcode in self.kdSpecialOpcodes:
1067	pass;
1068	elif not _isValidOpcodeByte(sOpcode):
1069	return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
1070
1071	# Set it.
1072	if oInstr.sOpcode is not None:
1073	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
1074	oInstr.sOpcode = sOpcode;
1075
1076	_ = iEndLine;
1077	return True;
1078
1079	## Valid values for \@openc
1080	kdEncodings = {
1081	'ModR/M': [],
1082	};
1083
1084	def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
1085	"""
1086	Tag: \@openc
1087	Value: ModR/M\|TBD
1088
1089	The instruction operand encoding style.
1090	"""
1091	oInstr = self.ensureInstructionForOpTag(iTagLine);
1092
1093	# Flatten and validate the value.
1094	sEncoding = self.flattenAllSections(aasSections);
1095	if sEncoding in self.kdEncodings:
1096	pass;
1097	elif not _isValidOpcodeByte(sEncoding):
1098	return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
1099
1100	# Set it.
1101	if oInstr.sEncoding is not None:
1102	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
1103	% ( sTag, oInstr.sEncoding, sEncoding,));
1104	oInstr.sEncoding = sEncoding;
1105
1106	_ = iEndLine;
1107	return True;
1108
1109	## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
1110	kdEFlags = {
1111	# Debugger flag notation:
1112	'ov': 'X86_EFL_OF', ##< OVerflow.
1113	'nv': '!X86_EFL_OF', ##< No Overflow.
1114
1115	'ng': 'X86_EFL_SF', ##< NeGative (sign).
1116	'pl': '!X86_EFL_SF', ##< PLuss (sign).
1117
1118	'zr': 'X86_EFL_ZF', ##< ZeRo.
1119	'nz': '!X86_EFL_ZF', ##< No Zero.
1120
1121	'af': 'X86_EFL_AF', ##< Aux Flag.
1122	'na': '!X86_EFL_AF', ##< No Aux.
1123
1124	'po': 'X86_EFL_PF', ##< Parity Pdd.
1125	'pe': '!X86_EFL_PF', ##< Parity Even.
1126
1127	'cf': 'X86_EFL_CF', ##< Carry Flag.
1128	'nc': '!X86_EFL_CF', ##< No Carry.
1129
1130	'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
1131	'di': '!X86_EFL_IF', ##< Disabled Interrupts.
1132
1133	'dn': 'X86_EFL_DF', ##< DowN (string op direction).
1134	'up': '!X86_EFL_DF', ##< UP (string op direction).
1135
1136	'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
1137	'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
1138	'ac': 'X86_EFL_AC', ##< Alignment Check.
1139	'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
1140	'rf': 'X86_EFL_RF', ##< Resume Flag.
1141	'nt': 'X86_EFL_NT', ##< Nested Task.
1142	'tf': 'X86_EFL_TF', ##< Trap flag.
1143
1144	# Reference manual notation:
1145	'of': 'X86_EFL_OF',
1146	'sf': 'X86_EFL_SF',
1147	'zf': 'X86_EFL_ZF',
1148	'cf': 'X86_EFL_CF',
1149	'pf': 'X86_EFL_PF',
1150	'if': 'X86_EFL_IF',
1151	'df': 'X86_EFL_DF',
1152	'iopl': 'X86_EFL_IOPL',
1153	'id': 'X86_EFL_ID',
1154	};
1155
1156	## EFlags tag to Instruction attribute name.
1157	kdOpFlagToAttr = {
1158	'@opfltest': 'asFlTest',
1159	'@opflmodify': 'asFlModify',
1160	'@opflundef': 'asFlUndefined',
1161	'@opflset': 'asFlSet',
1162	'@opflclear': 'asFlClear',
1163	};
1164
1165	def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
1166	"""
1167	Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
1168	Value: <eflags specifier>
1169
1170	"""
1171	oInstr = self.ensureInstructionForOpTag(iTagLine);
1172
1173	# Flatten, split up and validate the values.
1174	asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
1175	if len(asFlags) == 1 and asFlags[0].lower() == 'none':
1176	asFlags = [];
1177	else:
1178	fRc = True;
1179	for iFlag, sFlag in enumerate(asFlags):
1180	if sFlag not in self.kdEFlags:
1181	if sFlag.strip() in self.kdEFlags:
1182	asFlags[iFlag] = sFlag.strip();
1183	else:
1184	fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
1185	if not fRc:
1186	return False;
1187
1188	# Set them.
1189	asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
1190	if asOld is not None:
1191	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
1192	setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
1193
1194	_ = iEndLine;
1195	return True;
1196
1197	## \@ophints values.
1198	kdHints = {
1199	'invalid': 'DISOPTYPE_INVALID', ##<
1200	'harmless': 'DISOPTYPE_HARMLESS', ##<
1201	'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
1202	'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
1203	'dangerous': 'DISOPTYPE_DANGEROUS', ##<
1204	'portio': 'DISOPTYPE_PORTIO', ##<
1205	'privileged': 'DISOPTYPE_PRIVILEGED', ##<
1206	'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
1207	'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
1208	'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
1209	'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
1210	'interrupt': 'DISOPTYPE_INTERRUPT', ##<
1211	'illegal': 'DISOPTYPE_ILLEGAL', ##<
1212	'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0. */
1213	'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0. */
1214	'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
1215	'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
1216	'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
1217	'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode */
1218	'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode */
1219	'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size */
1220	'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes */
1221	'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte */
1222	'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b */
1223	'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes (only in 16 & 32 bits mode!) */
1224	'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet! */
1225	'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet! */
1226	'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet! */
1227	};
1228
1229	def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
1230	"""
1231	Tag: \@ophints
1232	Value: Comma or space separated list of flags and hints.
1233
1234	This covers the disassembler flags table and more.
1235	"""
1236	oInstr = self.ensureInstructionForOpTag(iTagLine);
1237
1238	# Flatten as a space separated list, split it up and validate the values.
1239	asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1240	if len(asHints) == 1 and asHints[0].lower() == 'none':
1241	asHints = [];
1242	else:
1243	fRc = True;
1244	for iHint, sHint in enumerate(asHints):
1245	if sHint not in self.kdHints:
1246	if sHint.strip() in self.kdHints:
1247	sHint[iHint] = sHint.strip();
1248	else:
1249	fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
1250	if not fRc:
1251	return False;
1252
1253	# Append them.
1254	for sHint in asHints:
1255	if sHint not in oInstr.dHints:
1256	oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
1257	else:
1258	self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
1259
1260	_ = iEndLine;
1261	return True;
1262
1263	## \@opcpuid
1264	kdCpuIdFlags = {
1265	'vme': 'X86_CPUID_FEATURE_EDX_VME',
1266	'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
1267	'msr': 'X86_CPUID_FEATURE_EDX_MSR',
1268	'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
1269	'sep': 'X86_CPUID_FEATURE_EDX_SEP',
1270	'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
1271	'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
1272	'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
1273	'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
1274	'sse': 'X86_CPUID_FEATURE_EDX_SSE',
1275	'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
1276	'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
1277	'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
1278	'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
1279	'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
1280	'smx': 'X86_CPUID_FEATURE_ECX_TM2',
1281	'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
1282	'fma': 'X86_CPUID_FEATURE_ECX_FMA',
1283	'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
1284	'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
1285	'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
1286	'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
1287	'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
1288	'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
1289	'aes': 'X86_CPUID_FEATURE_ECX_AES',
1290	'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
1291	'avx': 'X86_CPUID_FEATURE_ECX_AVX',
1292	'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
1293	'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
1294
1295	'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
1296	'3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
1297	'3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
1298	'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
1299	'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
1300	'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
1301	'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
1302	'3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
1303	'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
1304	'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
1305	};
1306
1307	def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
1308	"""
1309	Tag: \@opcpuid
1310	Value: none \| <CPUID flag specifier>
1311
1312	CPUID feature bit which is required for the instruction to be present.
1313	"""
1314	oInstr = self.ensureInstructionForOpTag(iTagLine);
1315
1316	# Flatten as a space separated list, split it up and validate the values.
1317	asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1318	if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
1319	asCpuIds = [];
1320	else:
1321	fRc = True;
1322	for iCpuId, sCpuId in enumerate(asCpuIds):
1323	if sCpuId not in self.kdCpuIds:
1324	if sCpuId.strip() in self.kdCpuIds:
1325	sCpuId[iCpuId] = sCpuId.strip();
1326	else:
1327	fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
1328	if not fRc:
1329	return False;
1330
1331	# Append them.
1332	for sCpuId in asCpuIds:
1333	if sCpuId not in oInstr.asCpuIds:
1334	oInstr.asCpuIds.append(sCpuId);
1335	else:
1336	self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
1337
1338	_ = iEndLine;
1339	return True;
1340
1341	def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
1342	"""
1343	Tag: \@opgroup
1344	Value: op_grp1[_subgrp2[_subsubgrp3]]
1345
1346	Instruction grouping.
1347	"""
1348	oInstr = self.ensureInstructionForOpTag(iTagLine);
1349
1350	# Flatten as a space separated list, split it up and validate the values.
1351	asGroups = self.flattenAllSections(aasSections).split();
1352	if len(asGroups) != 1:
1353	return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
1354	sGroup = asGroups[0];
1355	if not self.oReGroupName.match(sGroup):
1356	return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
1357	% (sTag, sGroup, self.oReGroupName.pattern));
1358
1359	# Set it.
1360	if oInstr.sGroup is not None:
1361	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
1362	oInstr.sGroup = sGroup;
1363
1364	_ = iEndLine;
1365	return True;
1366
1367	## \@opunused, \@opinvalid, \@opinvlstyle
1368	kdInvalidStyles = {
1369	'immediate': [], ##< CPU stops decoding immediately after the opcode.
1370	'intel-modrm': [], ##< Intel decodes ModR/M.
1371	'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
1372	'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
1373	'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
1374	};
1375
1376	def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
1377	"""
1378	Tag: \@opunused, \@opinvalid, \@opinvlstyle
1379	Value: <invalid opcode behaviour style>
1380
1381	The \@opunused indicates the specification is for a currently unused
1382	instruction encoding.
1383
1384	The \@opinvalid indicates the specification is for an invalid currently
1385	instruction encoding (like UD2).
1386
1387	The \@opinvlstyle just indicates how CPUs decode the instruction when
1388	not supported (\@opcpuid, \@opmincpu) or disabled.
1389	"""
1390	oInstr = self.ensureInstructionForOpTag(iTagLine);
1391
1392	# Flatten as a space separated list, split it up and validate the values.
1393	asStyles = self.flattenAllSections(aasSections).split();
1394	if len(asStyles) != 1:
1395	return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
1396	sStyle = asStyles[0];
1397	if sStyle not in self.kdInvalidStyle:
1398	return self.errorComment(iTagLine, '%s: invalid invalid behviour style: %s (valid: %s)'
1399	% (sTag, sStyle, self.kdInvalidStyles.keys(),));
1400	# Set it.
1401	if oInstr.sInvlStyle is not None:
1402	return self.errorComment(iTagLine,
1403	'%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
1404	% ( sTag, oInstr.sInvlStyle, sStyle,));
1405	oInstr.sInvlStyle = sStyle;
1406	if sTag == '@opunused':
1407	oInstr.fUnused = True;
1408	elif sTag == '@opinvalid':
1409	oInstr.fInvalid = True;
1410
1411	_ = iEndLine;
1412	return True;
1413
1414	def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine):
1415	"""
1416	Tag: \@optest
1417	Value: [<selectors>[ ]?] <inputs> -> <outputs>
1418	Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
1419
1420	The main idea here is to generate basic instruction tests.
1421
1422	The probably simplest way of handling the diverse input, would be to use
1423	it to produce size optimized byte code for a simple interpreter that
1424	modifies the register input and output states.
1425
1426	An alternative to the interpreter would be creating multiple tables,
1427	but that becomes rather complicated wrt what goes where and then to use
1428	them in an efficient manner.
1429	"""
1430	oInstr = self.ensureInstructionForOpTag(iTagLine);
1431
1432	#
1433	# Do it section by section.
1434	#
1435	for asSectionLines in aasSections:
1436	#
1437	# Sort the input into outputs, inputs and selector conditions.
1438	#
1439	sFlatSection = self.flattenAllSections([asSectionLines,]);
1440	if len(sFlatSection) == 0:
1441	self.errorComment(iTagLine, '%s: missing value' % ( sTag,));
1442	continue;
1443	oTest = InstructionTest(oInstr);
1444
1445	asSelectors = [];
1446	asInputs = [];
1447	asOutputs = [];
1448	asCur = asOutputs;
1449	fRc = True;
1450	asWords = sFlatSection.split();
1451	for iWord in range(len(asWords) - 1, -1, -1):
1452	sWord = asWords[iWord];
1453	# Check for array switchers.
1454	if sWord == '->':
1455	if asCur != asOutputs:
1456	fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
1457	break;
1458	asCur = asInputs;
1459	elif sWord == '/':
1460	if asCur != asInputs:
1461	fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
1462	break;
1463	asCur = asSelectors;
1464	else:
1465	asCur.insert(0, sWord);
1466
1467	#
1468	# Validate and add selectors.
1469	#
1470	for sCond in asSelectors:
1471	sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
1472	oSelector = None;
1473	for sOp in TestSelector.kasCompareOps:
1474	off = sCondExp.find(sOp);
1475	if off >= 0:
1476	sVariable = sCondExp[:off];
1477	sValue = sCondExp[off + len(sOp):];
1478	if sVariable in TestSelector.kdVariables:
1479	if sValue in TestSelector.kdVariables[sVariable]:
1480	oSelector = TestSelector(sVariable, sOp, sValue);
1481	else:
1482	self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
1483	% ( sTag, sValue, sCond,
1484	TestSelector.kdVariables[sVariable].keys(),));
1485	else:
1486	self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
1487	% ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
1488	break;
1489	if oSelector is not None:
1490	for oExisting in oTest.aoSelectors:
1491	if oExisting.sVariable == oSelector.sVariable:
1492	self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
1493	% ( sTag, oSelector.sVariable, oExisting, oSelector,));
1494	oTest.aoSelectors.append(oSelector);
1495	else:
1496	fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
1497
1498	#
1499	# Validate outputs and inputs, adding them to the test as we go along.
1500	#
1501	for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
1502	for sItem in asItems:
1503	oItem = None;
1504	for sOp in TestInOut.kasOperators:
1505	off = sItem.find(sOp);
1506	if off >= 0:
1507	sField = sItem[:off];
1508	sValueType = sItem[off + len(sOp):];
1509	if sField in TestInOut.kdFields:
1510	asSplit = sValueType.split(':', 1);
1511	sValue = asSplit[0];
1512	sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
1513	if sType in TestInOut.kdTypes:
1514	oValid = TestInOut.kdTypes[sType].validate(sValue);
1515	if oValid is True:
1516	oItem = TestInOut(sField, sOp, sValue, sType);
1517	else:
1518	self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s)'
1519	% ( sTag, sDesc, sValue, sItem, sType, ));
1520	else:
1521	self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
1522	% ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
1523	else:
1524	self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s" (valid fields: %s)'
1525	% ( sTag, sDesc, sField, sItem, TestInOut.kdFields.keys(),));
1526	break;
1527	if oItem is not None:
1528	for oExisting in aoDst:
1529	if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
1530	self.errorComment(iTagLine,
1531	'%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
1532	% ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
1533	aoDst.append(oItem);
1534	else:
1535	fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sItem,));
1536
1537	#
1538	# .
1539	#
1540	if fRc:
1541	oInstr.aoTests.append(oTest);
1542	else:
1543	self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
1544	self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
1545	% (sTag, asSelectors, asInputs, asOutputs,));
1546
1547	return True;
1548
1549	def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
1550	"""
1551	Tag: \@opfunction
1552	Value: <VMM function name>
1553
1554	This is for explicitly setting the IEM function name. Normally we pick
1555	this up from the FNIEMOP_XXX macro invocation after the description, or
1556	generate it from the mnemonic and operands.
1557
1558	It it thought it maybe necessary to set it when specifying instructions
1559	which implementation isn't following immediately or aren't implemented yet.
1560	"""
1561	oInstr = self.ensureInstructionForOpTag(iTagLine);
1562
1563	# Flatten and validate the value.
1564	sFunction = self.flattenAllSections(aasSections);
1565	if not self.oReFunctionName.match(sFunction):
1566	return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
1567	% (sTag, Name, self.oReFunctionName.pattern));
1568
1569	if oInstr.sFunction is not None:
1570	return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
1571	% (sTag, oInstr.sStats, sStats,));
1572	oInstr.sFunction = sFunction;
1573
1574	_ = iEndLine;
1575	return True;
1576
1577	def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
1578	"""
1579	Tag: \@opstats
1580	Value: <VMM statistics base name>
1581
1582	This is for explicitly setting the statistics name. Normally we pick
1583	this up from the IEMOP_MNEMONIC macro invocation, or generate it from
1584	the mnemonic and operands.
1585
1586	It it thought it maybe necessary to set it when specifying instructions
1587	which implementation isn't following immediately or aren't implemented yet.
1588	"""
1589	oInstr = self.ensureInstructionForOpTag(iTagLine);
1590
1591	# Flatten and validate the value.
1592	sStats = self.flattenAllSections(aasSections);
1593	if not self.oReStatsName.match(sStats):
1594	return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
1595	% (sTag, Name, self.oReStatsName.pattern));
1596
1597	if oInstr.sStats is not None:
1598	return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
1599	% (sTag, oInstr.sStats, sStats,));
1600	oInstr.sStats = sStats;
1601
1602	_ = iEndLine;
1603	return True;
1604
1605	def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
1606	"""
1607	Tag: \@opdone
1608	Value: none
1609
1610	Used to explictily flush the instructions that have been specified.
1611	"""
1612	sFlattened = self.flattenAllSections(aasSections);
1613	if sFlattened != '':
1614	return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
1615	_ = sTag; _ = iEndLine;
1616	return self.doneInstructions();
1617
1618	## @}
1619
1620
1621	def parseComment(self):
1622	"""
1623	Parse the current comment (self.sComment).
1624
1625	If it's a opcode specifiying comment, we reset the macro stuff.
1626	"""
1627	#
1628	# Reject if comment doesn't seem to contain anything interesting.
1629	#
1630	if self.sComment.find('Opcode') < 0 \
1631	and self.sComment.find('@') < 0:
1632	return False;
1633
1634	#
1635	# Split the comment into lines, removing leading asterisks and spaces.
1636	# Also remove leading and trailing empty lines.
1637	#
1638	asLines = self.sComment.split('\n');
1639	for iLine, sLine in enumerate(asLines):
1640	asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
1641
1642	while len(asLines) > 0 and len(asLines[0]) == 0:
1643	self.iCommentLine += 1;
1644	asLines.pop(0);
1645
1646	while len(asLines) > 0 and len(asLines[-1]) == 0:
1647	asLines.pop(len(asLines) - 1);
1648
1649	#
1650	# Check for old style: Opcode 0x0f 0x12
1651	#
1652	if asLines[0].startswith('Opcode '):
1653	self.parseCommentOldOpcode(asLines);
1654
1655	#
1656	# Look for @op* tagged data.
1657	#
1658	cOpTags = 0;
1659	sFlatDefault = None;
1660	sCurTag = '@default';
1661	iCurTagLine = 0;
1662	asCurSection = [];
1663	aasSections = [ asCurSection, ];
1664	for iLine, sLine in enumerate(asLines):
1665	if not sLine.startswith('@'):
1666	if len(sLine) > 0:
1667	asCurSection.append(sLine);
1668	elif len(asCurSection) != 0:
1669	asCurSection = [];
1670	aasSections.append(asCurSection);
1671	else:
1672	#
1673	# Process the previous tag.
1674	#
1675	if sCurTag in self.dTagHandlers:
1676	self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
1677	cOpTags += 1;
1678	elif sCurTag.startswith('@op'):
1679	self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
1680	elif sCurTag == '@default':
1681	sFlatDefault = self.flattenAllSections(aasSections);
1682
1683	#
1684	# New tag.
1685	#
1686	asSplit = sLine.split(None, 1);
1687	sCurTag = asSplit[0].lower();
1688	if len(asSplit) > 1:
1689	asCurSection = [asSplit[1],];
1690	else:
1691	asCurSection = [];
1692	aasSections = [asCurSection, ];
1693	iCurTagLine = iLine;
1694
1695	#
1696	# Don't allow default text in blocks containing @op*.
1697	#
1698	if cOpTags > 0 and len(sFlatDefault) > 0:
1699	self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
1700
1701	return True;
1702
1703	def parseMacroInvocation(self, sInvocation):
1704	"""
1705	Parses a macro invocation.
1706
1707	Returns a tuple, first element is the offset following the macro
1708	invocation. The second element is a list of macro arguments, where the
1709	zero'th is the macro name.
1710	"""
1711	# First the name.
1712	offOpen = sInvocation.find('(');
1713	if offOpen <= 0:
1714	raiseError("macro invocation open parenthesis not found");
1715	sName = sInvocation[:offOpen].strip();
1716	if not self.oReMacroName.match(sName):
1717	return self.error("invalid macro name '%s'" % (sName,));
1718	asRet = [sName, ];
1719
1720	# Arguments.
1721	iLine = self.iLine;
1722	cDepth = 1;
1723	off = offOpen + 1;
1724	offStart = off;
1725	while cDepth > 0:
1726	if off >= len(sInvocation):
1727	if iLine >= len(self.asLines):
1728	return self.error('macro invocation beyond end of file');
1729	sInvocation += self.asLines[iLine];
1730	iLine += 1;
1731	ch = sInvocation[off];
1732
1733	if ch == ',' or ch == ')':
1734	if cDepth == 1:
1735	asRet.append(sInvocation[offStart:off].strip());
1736	offStart = off + 1;
1737	if ch == ')':
1738	cDepth -= 1;
1739	elif ch == '(':
1740	cDepth += 1;
1741	off += 1;
1742
1743	return (off, asRet);
1744
1745	def findAndParseMacroInvocation(self, sCode, sMacro):
1746	"""
1747	Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
1748	"""
1749	offHit = sCode.find(sMacro);
1750	if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
1751	offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
1752	return (offHit + offAfter, asRet);
1753	return (len(sCode), None);
1754
1755	def findAndParseFirstMacroInvocation(self, sCode, asMacro):
1756	"""
1757	Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
1758	"""
1759	for sMacro in asMacro:
1760	offAfter, asRet = self.findAndParseMacroInvocation(sCode, sMacro);
1761	if asRet is not None:
1762	return (offAfter, asRet);
1763	return (len(sCode), None);
1764
1765	def checkCodeForMacro(self, sCode):
1766	"""
1767	Checks code for relevant macro invocation.
1768	"""
1769	#
1770	# Scan macro invocations.
1771	#
1772	if sCode.find('(') > 0:
1773	# Look for instruction decoder function definitions. ASSUME single line.
1774	(_, asArgs) = self.findAndParseFirstMacroInvocation(sCode,
1775	[ 'FNIEMOP_DEF',
1776	'FNIEMOP_STUB',
1777	'FNIEMOP_STUB_1',
1778	'FNIEMOP_UD_STUB',
1779	'FNIEMOP_UD_STUB_1' ]);
1780	if asArgs is not None:
1781	sFunction = asArgs[1];
1782
1783	if len(self.asCurInstr) == 0:
1784	self.addInstruction().sMnemonic = sFunction.split('_')[1];
1785	self.setInstrunctionAttrib('sFunction', sFunction);
1786	self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
1787	self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
1788	if asArgs[0].find('STUB') > 0:
1789	self.doneInstructions();
1790	return True;
1791
1792	# IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
1793	(_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
1794	if asArgs is not None:
1795	if len(self.asCurInstr) == 1:
1796	self.setInstrunctionAttrib('sStats', asArgs[1]);
1797	self.setInstrunctionAttrib('sMnemonic', asArgs[1].split('_')[0]);
1798
1799	# IEMOP_HLP_DECODED_NL_1(a_uDisOpNo, a_fIemOpFlags, a_uDisParam0, a_fDisOpType)
1800	(_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_HLP_DECODED_NL_1');
1801	if asArgs is not None:
1802	if len(self.asCurInstr) == 1:
1803	self.setInstrunctionAttrib('sRawDisOpNo', asArgs[1]);
1804	self.setInstrunctionAttrib('sRawIemOpFlags', asArgs[2]);
1805	self.setInstrunctionArrayAttrib('asRawDisParams', 0, asArgs[3]);
1806
1807	# IEMOP_HLP_DECODED_NL_2(a_uDisOpNo, a_fIemOpFlags, a_uDisParam0, a_uDisParam1, a_fDisOpType)
1808	(_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_HLP_DECODED_NL_2');
1809	if asArgs is not None:
1810	if len(self.asCurInstr) == 1:
1811	self.setInstrunctionAttrib('sRawDisOpNo', asArgs[1]);
1812	self.setInstrunctionAttrib('sRawIemOpFlags', asArgs[2]);
1813	self.setInstrunctionArrayAttrib('asRawDisParams', 0, asArgs[3]);
1814	self.setInstrunctionArrayAttrib('asRawDisParams', 1, asArgs[4]);
1815
1816	return False;
1817
1818
1819	def parse(self):
1820	"""
1821	Parses the given file.
1822	Returns number or errors.
1823	Raises exception on fatal trouble.
1824	"""
1825	self.debug('Parsing %s' % (self.sSrcFile,));
1826
1827	while self.iLine < len(self.asLines):
1828	sLine = self.asLines[self.iLine];
1829	self.iLine += 1;
1830
1831	# We only look for comments, so only lines with a slash might possibly
1832	# influence the parser state.
1833	if sLine.find('/') >= 0:
1834	#self.debug('line %d: slash' % (self.iLine,));
1835
1836	offLine = 0;
1837	while offLine < len(sLine):
1838	if self.iState == self.kiCode:
1839	offHit = sLine.find('/*', offLine); # only multiline comments for now.
1840	if offHit >= 0:
1841	self.sComment = '';
1842	self.iCommentLine = self.iLine;
1843	self.iState = self.kiCommentMulti;
1844	offLine = offHit + 2;
1845	else:
1846	offLine = len(sLine);
1847
1848	elif self.iState == self.kiCommentMulti:
1849	offHit = sLine.find('*/', offLine);
1850	if offHit >= 0:
1851	self.sComment += sLine[offLine:offHit];
1852	self.iState = self.kiCode;
1853	offLine = offHit + 2;
1854	self.parseComment();
1855	else:
1856	self.sComment += sLine[offLine:];
1857	offLine = len(sLine);
1858	else:
1859	assert False;
1860
1861	# No slash, but append the line if in multi-line comment.
1862	elif self.iState == self.kiCommentMulti:
1863	#self.debug('line %d: multi' % (self.iLine,));
1864	self.sComment += sLine;
1865
1866	# No slash, but check code line for relevant macro.
1867	elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
1868	#self.debug('line %d: macro' % (self.iLine,));
1869	self.checkCodeForMacro(sLine);
1870
1871	# If the line is a '}' in the first position, complete the instructions.
1872	elif self.iState == self.kiCode and sLine[0] == '}':
1873	#self.debug('line %d: }' % (self.iLine,));
1874	self.doneInstructions();
1875
1876	self.doneInstructions();
1877	self.debug('%s instructions in %s' % (self.cTotalInstr, self.sSrcFile,));
1878	self.debug('%s instruction stubs' % (self.cTotalStubs,));
1879	return self.printErrors();
1880
1881
1882	def __parseFileByName(sSrcFile, sDefaultMap):
1883	"""
1884	Parses one source file for instruction specfications.
1885	"""
1886	#
1887	# Read sSrcFile into a line array.
1888	#
1889	try:
1890	oFile = open(sSrcFile, "r");
1891	except Exception as oXcpt:
1892	raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
1893	try:
1894	asLines = oFile.readlines();
1895	except Exception as oXcpt:
1896	raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
1897	finally:
1898	oFile.close();
1899
1900	#
1901	# Do the parsing.
1902	#
1903	try:
1904	cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
1905	except ParserException as oXcpt:
1906	print(str(oXcpt));
1907	raise;
1908	except Exception as oXcpt:
1909	raise;
1910
1911	return cErrors;
1912
1913
1914	def __parseAll():
1915	"""
1916	Parses all the IEMAllInstruction*.cpp.h files.
1917
1918	Raises exception on failure.
1919	"""
1920	sSrcDir = os.path.dirname(os.path.abspath(__file__));
1921	cErrors = 0;
1922	for sDefaultMap, sName in [
1923	( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
1924	#( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
1925	]:
1926	cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
1927
1928	if cErrors != 0:
1929	raise Exception('%d parse errors' % (cErrors,));
1930	return True;
1931
1932
1933
1934	__parseAll();
1935
1936

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 65828

Download in other formats: