IEMAllInstructionsPython.py@ 65834

Last change on this file since 65834 was 65834, checked in by vboxsync, 8 years ago
IEMAllInstructionsPython.py: some more tinkering.
Property svn:eol-style set to `LF` Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 80.1 KB

Line
1	#!/usr/bin/env python
2	# -- coding: utf-8 --
3	# $Id: IEMAllInstructionsPython.py 65834 2017-02-21 16:21:36Z vboxsync $
4
5	"""
6	IEM instruction extractor.
7
8	This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9	collects information about the instructions. It can then be used to generate
10	disassembler tables and tests.
11	"""
12
13	__copyright__ = \
14	"""
15	Copyright (C) 2017 Oracle Corporation
16
17	This file is part of VirtualBox Open Source Edition (OSE), as
18	available from http://www.virtualbox.org. This file is free software;
19	you can redistribute it and/or modify it under the terms of the GNU
20	General Public License (GPL) as published by the Free Software
21	Foundation, in version 2 as it comes in the "COPYING" file of the
22	VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23	hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25	The contents of this file may alternatively be used under the terms
26	of the Common Development and Distribution License Version 1.0
27	(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28	VirtualBox OSE distribution, in which case the provisions of the
29	CDDL are applicable instead of those of the GPL.
30
31	You may elect to license modified versions of this file under the
32	terms and conditions of either the GPL or the CDDL or both.
33	"""
34	__version__ = "$Revision: 65834 $"
35
36	# Standard python imports.
37	import os
38	import re
39	import sys
40
41	# Only the main script needs to modify the path.
42	g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
43	'ValidationKit');
44	sys.path.append(g_ksValidationKitDir);
45
46	from common import utils;
47
48	# Python 3 hacks:
49	if sys.version_info[0] >= 3:
50	long = int; # pylint: disable=redefined-builtin,invalid-name
51
52
53	# Annotation example:
54	#
55	# \@opmnemonic add
56	# \@op1 reg:Eb
57	# \@op2 rm:Gb
58	# \@opmaps onebyte
59	# \@oppfx none
60	# \@opcode 0x00
61	# \@openc ModR/M
62	# \@opfltest none
63	# \@opflmodify of,sz,zf,af,pf,cf
64	# \@opflundef none
65	# \@opflset none
66	# \@opflclear none
67	# \@ophints harmless
68	# \@opstats add_Eb_Gb
69	# \@opgroup op_gen_arith_bin
70	# \@optest in1=1 in2=1 -> out1=2 outfl=a?,p?
71	# \@optest oppfx:o32 in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
72
73
74	g_kdX86EFlagsConstants = {
75	'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
76	'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
77	'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
78	'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
79	'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
80	'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
81	'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
82	'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
83	'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
84	'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
85	'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) \| RT_BIT_32(13))
86	'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
87	'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
88	'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
89	'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
90	'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
91	'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
92	'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
93	'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
94	'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
95	};
96
97
98	def _isValidOpcodeByte(sOpcode):
99	"""
100	Checks if sOpcode is a valid lower case opcode byte.
101	Returns true/false.
102	"""
103	if len(sOpcode) == 4:
104	if sOpcode[:2] == '0x':
105	if sOpcode[2] in '0123456789abcdef':
106	if sOpcode[3] in '0123456789abcdef':
107	return True;
108	return False;
109
110
111	class InstructionMap(object):
112	"""
113	Instruction map.
114
115	The opcode map provides the lead opcode bytes (empty for the one byte
116	opcode map). An instruction can be member of multiple opcode maps as long
117	as it uses the same opcode value within the map (because of VEX).
118	"""
119
120	kdEncodings = {
121	'legacy': [],
122	'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
123	'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
124	'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
125	'xop8': [], ##< XOP prefix with vvvvv = 8
126	'xop9': [], ##< XOP prefix with vvvvv = 9
127	'xop10': [], ##< XOP prefix with vvvvv = 10
128	};
129	kdSelectors = {
130	'byte': [], ##< next opcode byte selects the instruction (default).
131	'/r': [], ##< modrm.reg selects the instruction.
132	'mod /r': [], ##< modrm.reg and modrm.mod selects the instruction.
133	'!11 /r': [], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
134	'11 /r': [], ##< modrm.reg select the instruction with modrm.mod == 0y11.
135	'11': [], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
136	};
137
138	def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy'):
139	assert sSelector in self.kdSelectors;
140	assert sEncoding in self.kdEncodings;
141	if asLeadOpcodes is None:
142	asLeadOpcodes = [];
143	else:
144	for sOpcode in asLeadOpcodes:
145	assert _isValidOpcodeByte(sOpcode);
146
147	self.sName = sName;
148	self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
149	self.sSelector = sSelector; ##< The member selector, see kdSelectors.
150	self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
151	self.aoInstructions = []; # type: Instruction
152
153
154	class TestType(object):
155	"""
156	Test value type.
157
158	This base class deals with integer like values. The fUnsigned constructor
159	parameter indicates the default stance on zero vs sign extending. It is
160	possible to override fUnsigned=True by prefixing the value with '+' or '-'.
161	"""
162	def __init__(self, sName, acbSizes = None, fUnsigned = True):
163	self.sName = sName;
164	self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
165	self.fUnsigned = fUnsigned;
166
167	class BadValue(Exception):
168	""" Bad value exception. """
169	def __init__(self, sMessage):
170	Exception.__init__(self, sMessage);
171	self.sMessage = sMessage;
172
173	def get(self, sValue):
174	"""
175	Get the shortest normal sized byte representation of oValue.
176
177	Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
178	The latter form is for AND+OR pairs where the first entry is what to
179	AND with the field and the second the one or OR with.
180
181	Raises BadValue if invalid value.
182	"""
183	if len(sValue) == 0:
184	raise TestType.BadValue('empty value');
185
186	# Deal with sign and detect hexadecimal or decimal.
187	fSignExtend = not self.fUnsigned;
188	if sValue[0] == '-' or sValue[0] == '+':
189	fSignExtend = True;
190	fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
191	else:
192	fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
193
194	# try convert it to long integer.
195	try:
196	iValue = long(sValue, 16 if fHex else 10);
197	except Exception as oXcpt:
198	raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
199
200	# Convert the hex string and pad it to a decent value.
201	if iValue >= 0:
202	sHex = hex(iValue);
203	else:
204	sHex = hex(iValue);
205	assert sHex[:2] == '0x', sHex;
206	if sys.version_info[0] >= 3:
207	sHex = sHex[2:];
208	else:
209	assert sHex[-1] == 'L';
210	sHex = sHex[2:-1];
211
212	cDigits = len(sHex);
213	if cDigits <= self.acbSizes[-1] * 2:
214	for cb in self.acbSizes:
215	if cDigits <= cb * 2:
216	cDigits = int((cDigits + cb - 1) / cb) * cb; # Seems like integer division returns a float in python.
217	break;
218	else:
219	cDigits = int((cDigits + self.acbSizes[-1] - 1) / self.acbSizes[-1]) * self.acbSizes[-1];
220	assert isinstance(cDigits, int)
221
222	if cDigits != len(sHex):
223	cNeeded = cDigits - len(sHex);
224	if iValue >= 0:
225	sHex = ('0' * cNeeded) + sHex;
226	else:
227	sHex = ('f' * cNeeded) + sHex;
228
229	# Invert and convert to bytearray and return it.
230	abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
231
232	return ((fSignExtend, abValue),);
233
234	def validate(self, sValue):
235	"""
236	Returns True if value is okay, error message on failure.
237	"""
238	try:
239	self.get(sValue);
240	except TestType.BadValue as oXcpt:
241	return oXcpt.sMessage;
242	return True;
243
244	def isAndOrPair(self, sValue):
245	"""
246	Checks if sValue is a pair.
247	"""
248	return False;
249
250
251	class TestTypeEflags(TestType):
252	"""
253	Special value parsing for EFLAGS/RFLAGS/FLAGS.
254	"""
255
256	kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
257
258	def __init__(self, sName):
259	TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
260
261	def get(self, sValue):
262	print('get(%s)' % (sValue,));
263	fClear = 0;
264	fSet = 0;
265	for sFlag in sValue.split(','):
266	sConstant = SimpleParser.kdEFlags.get(sFlag, None);
267	if sConstant is None:
268	print('get(%s) raise for %s/%s' % (sValue, sFlag,sConstant));
269	raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
270	if sConstant[0] == '!':
271	fClear \|= g_kdX86EFlagsConstants[sConstant[1:]];
272	else:
273	fSet \|= g_kdX86EFlagsConstants[sConstant];
274
275	print('get -> TestType.get');
276	aoSet = TestType.get(self, '0x%x' % (fSet,));
277	print('get: aoSet=%s' % (aoSet,));
278	if fClear != 0:
279	print('get -> TestType.get(%#x)' % (~fClear));
280	try:
281	aoClear = TestType.get(self, '%#x' % (~fClear))
282	except Exception as oXcpt:
283	print( '%s' % (oXcpt,))
284	raise;
285	print('get: aoClear=%s' % (aoSet,));
286	assert self.isAndOrPair(sValue) == True;
287	return (aoClear[0], aoSet[0]);
288	assert self.isAndOrPair(sValue) == False;
289	return aoSet;
290
291	def isAndOrPair(self, sValue):
292	for sZeroFlag in self.kdZeroValueFlags.keys():
293	if sValue.find(sZeroFlag) >= 0:
294	print('isAndOrPair(%s) -> True' % (sValue,));
295	return True;
296	print('isAndOrPair(%s) -> False' % (sValue,));
297	return False;
298
299
300
301	class TestInOut(object):
302	"""
303	One input or output state modifier.
304
305	This should be thought as values to modify BS3REGCTX and extended (needs
306	to be structured) state.
307	"""
308	## Assigned operators.
309	kasOperators = [
310	'&~=',
311	'&=',
312	'\|=',
313	'='
314	];
315	## Types
316	kdTypes = {
317	'uint': TestType('uint', fUnsigned = True),
318	'int': TestType('int'),
319	'efl': TestTypeEflags('efl'),
320	};
321	## CPU context fields.
322	kdFields = {
323	# name: ( default type, tbd, )
324	# Operands.
325	'op1': ( 'uint', '', ), ## \@op1
326	'op2': ( 'uint', '', ), ## \@op2
327	'op3': ( 'uint', '', ), ## \@op3
328	'op4': ( 'uint', '', ), ## \@op4
329	# Flags.
330	'efl': ( 'efl', '', ),
331	# 8-bit GPRs.
332	'al': ( 'uint', '', ),
333	'cl': ( 'uint', '', ),
334	'dl': ( 'uint', '', ),
335	'bl': ( 'uint', '', ),
336	'ah': ( 'uint', '', ),
337	'ch': ( 'uint', '', ),
338	'dh': ( 'uint', '', ),
339	'bh': ( 'uint', '', ),
340	'r8l': ( 'uint', '', ),
341	'r9l': ( 'uint', '', ),
342	'r10l': ( 'uint', '', ),
343	'r11l': ( 'uint', '', ),
344	'r12l': ( 'uint', '', ),
345	'r13l': ( 'uint', '', ),
346	'r14l': ( 'uint', '', ),
347	'r15l': ( 'uint', '', ),
348	# 16-bit GPRs.
349	'ax': ( 'uint', '', ),
350	'dx': ( 'uint', '', ),
351	'cx': ( 'uint', '', ),
352	'bx': ( 'uint', '', ),
353	'sp': ( 'uint', '', ),
354	'bp': ( 'uint', '', ),
355	'si': ( 'uint', '', ),
356	'di': ( 'uint', '', ),
357	'r8w': ( 'uint', '', ),
358	'r9w': ( 'uint', '', ),
359	'r10w': ( 'uint', '', ),
360	'r11w': ( 'uint', '', ),
361	'r12w': ( 'uint', '', ),
362	'r13w': ( 'uint', '', ),
363	'r14w': ( 'uint', '', ),
364	'r15w': ( 'uint', '', ),
365	# 32-bit GPRs.
366	'eax': ( 'uint', '', ),
367	'edx': ( 'uint', '', ),
368	'ecx': ( 'uint', '', ),
369	'ebx': ( 'uint', '', ),
370	'esp': ( 'uint', '', ),
371	'ebp': ( 'uint', '', ),
372	'esi': ( 'uint', '', ),
373	'edi': ( 'uint', '', ),
374	'r8d': ( 'uint', '', ),
375	'r9d': ( 'uint', '', ),
376	'r10d': ( 'uint', '', ),
377	'r11d': ( 'uint', '', ),
378	'r12d': ( 'uint', '', ),
379	'r13d': ( 'uint', '', ),
380	'r14d': ( 'uint', '', ),
381	'r15d': ( 'uint', '', ),
382	# 64-bit GPRs.
383	'rax': ( 'uint', '', ),
384	'rdx': ( 'uint', '', ),
385	'rcx': ( 'uint', '', ),
386	'rbx': ( 'uint', '', ),
387	'rsp': ( 'uint', '', ),
388	'rbp': ( 'uint', '', ),
389	'rsi': ( 'uint', '', ),
390	'rdi': ( 'uint', '', ),
391	'r8': ( 'uint', '', ),
392	'r9': ( 'uint', '', ),
393	'r10': ( 'uint', '', ),
394	'r11': ( 'uint', '', ),
395	'r12': ( 'uint', '', ),
396	'r13': ( 'uint', '', ),
397	'r14': ( 'uint', '', ),
398	'r15': ( 'uint', '', ),
399	# 16-bit, 32-bit or 64-bit registers according to operand size.
400	'oz.rax': ( 'uint', '', ),
401	'oz.rdx': ( 'uint', '', ),
402	'oz.rcx': ( 'uint', '', ),
403	'oz.rbx': ( 'uint', '', ),
404	'oz.rsp': ( 'uint', '', ),
405	'oz.rbp': ( 'uint', '', ),
406	'oz.rsi': ( 'uint', '', ),
407	'oz.rdi': ( 'uint', '', ),
408	'oz.r8': ( 'uint', '', ),
409	'oz.r9': ( 'uint', '', ),
410	'oz.r10': ( 'uint', '', ),
411	'oz.r11': ( 'uint', '', ),
412	'oz.r12': ( 'uint', '', ),
413	'oz.r13': ( 'uint', '', ),
414	'oz.r14': ( 'uint', '', ),
415	'oz.r15': ( 'uint', '', ),
416	};
417
418	def __init__(self, sField, sOp, sValue, sType):
419	assert sField in self.kdFields;
420	assert sOp in self.kasOperators;
421	self.sField = sField;
422	self.sOp = sOp;
423	self.sValue = sValue;
424	self.sType = sType;
425
426
427	class TestSelector(object):
428	"""
429	One selector for an instruction test.
430	"""
431	## Selector compare operators.
432	kasCompareOps = [ '==', '!=' ];
433	## Selector variables and their valid values.
434	kdVariables = {
435	# Operand size.
436	'size': {
437	'o16': 'size_o16',
438	'o32': 'size_o32',
439	'o64': 'size_o64',
440	},
441	# Execution ring.
442	'ring': {
443	'0': 'ring_0',
444	'1': 'ring_1',
445	'2': 'ring_2',
446	'3': 'ring_3',
447	'0..2': 'ring_0_thru_2',
448	'1..3': 'ring_1_thru_3',
449	},
450	# Basic code mode.
451	'codebits': {
452	'64': 'code_64bit',
453	'32': 'code_32bit',
454	'16': 'code_16bit',
455	},
456	# cpu modes.
457	'mode': {
458	'real': 'mode_real',
459	'prot': 'mode_prot',
460	'long': 'mode_long',
461	'v86': 'mode_v86',
462	'smm': 'mode_smm',
463	'vmx': 'mode_vmx',
464	'svm': 'mode_svm',
465	},
466	# paging on/off
467	'paging': {
468	'on': 'paging_on',
469	'off': 'paging_off',
470	},
471	};
472	## Selector shorthand predicates.
473	## These translates into variable expressions.
474	kdPredicates = {
475	'o16': 'size==o16',
476	'o32': 'size==o32',
477	'o64': 'size==o64',
478	'ring0': 'ring==0',
479	'!ring0': 'ring==1..3',
480	'ring1': 'ring==1',
481	'ring2': 'ring==2',
482	'ring3': 'ring==3',
483	'user': 'ring==3',
484	'supervisor': 'ring==0..2',
485	'real': 'mode==real',
486	'prot': 'mode==prot',
487	'long': 'mode==long',
488	'v86': 'mode==v86',
489	'smm': 'mode==smm',
490	'vmx': 'mode==vmx',
491	'svm': 'mode==svm',
492	'paging': 'paging==on',
493	'!paging': 'paging==off',
494	};
495
496	def __init__(self, sVariable, sOp, sValue):
497	assert sVariable in self.kdVariables;
498	assert sOp in self.kasCompareOps;
499	assert sValue in self.kdVariables[sVariable];
500	self.sVariable = sVariable;
501	self.sOp = sOp;
502	self.sValue = sValue;
503
504
505	class InstructionTest(object):
506	"""
507	Instruction test.
508	"""
509
510	def __init__(self, oInstr): # type: (InstructionTest, Instruction)
511	self.oInstr = oInstr; # type: InstructionTest
512	self.aoInputs = [];
513	self.aoOutputs = [];
514	self.aoSelectors = []; # type: list(TestSelector)
515
516
517	class Operand(object):
518	"""
519	Instruction operand.
520	"""
521
522	## \@op[1-4]
523	kdLocations = {
524	'reg': [], ## modrm.reg
525	'rm': [], ## modrm.rm
526	};
527
528	## \@op[1-4]
529	kdTypes = {
530	'Eb': [],
531	'Gb': [],
532	};
533
534	def __init__(self, sWhere, sType):
535	assert sWhere in self.kdLocations;
536	assert sType in self.kdTypes;
537	self.sWhere = sWhere; ##< kdLocations
538	self.sType = sType; ##< kdTypes
539
540
541	class Instruction(object):
542	"""
543	Instruction.
544	"""
545
546	def __init__(self, sSrcFile, iLine):
547	## @name Core attributes.
548	## @{
549	self.sMnemonic = None;
550	self.sBrief = None;
551	self.asDescSections = []; # type: list(str)
552	self.aoMaps = []; # type: list(InstructionMap)
553	self.aoOperands = []; # type: list(Operand)
554	self.sPrefix = None; ##< Single prefix: None, 0x66, 0xf3, 0xf2
555	self.sOpcode = None;
556	self.sEncoding = None;
557	self.asFlTest = None;
558	self.asFlModify = None;
559	self.asFlUndefined = None;
560	self.asFlSet = None;
561	self.asFlClear = None;
562	self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictioarny for speed; dummy value).
563	self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
564	self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
565	self.aoTests = []; # type: list(InstructionTest)
566	self.oCpus = None; ##< Some CPU restriction expression...
567	self.sGroup = None;
568	self.fUnused = False; ##< Unused instruction.
569	self.fInvalid = False; ##< Invalid instruction (like UD2).
570	self.sInvalidStyle = None; ##< Invalid behviour style
571	## @}
572
573	## @name Implementation attributes.
574	## @{
575	self.sStats = None;
576	self.sFunction = None;
577	self.fStub = False;
578	self.fUdStub = False;
579	## @}
580
581	## @name Decoding info
582	## @{
583	self.sSrcFile = sSrcFile;
584	self.iLineCreated = iLine;
585	self.iLineCompleted = None;
586	self.cOpTags = 0;
587	## @}
588
589	## @name Intermediate input fields.
590	## @{
591	self.sRawDisOpNo = None;
592	self.asRawDisParams = [];
593	self.sRawIemOpFlags = None;
594	self.sRawOldOpcodes = None;
595	## @}
596
597
598	## All the instructions.
599	g_aoAllInstructions = []; # type: Instruction
600
601	## Instruction maps.
602	g_dInstructionMaps = {
603	'one': InstructionMap('one'),
604	'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
605	'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
606	'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
607	'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
608	'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
609	'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
610	'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
611	'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
612	'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
613	'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
614	'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
615	'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
616	'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
617	'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
618	'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
619	'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
620	'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
621	'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
622	'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
623
624	'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',]),
625	'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
626	'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
627	'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
628	'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
629	'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
630	'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
631	'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
632	'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
633	'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
634	'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'mod /r'),
635	'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
636	'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
637	'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
638
639	'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
640	'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
641	'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
642
643	'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
644	'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
645	'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
646	'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
647	'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'mod /r'),
648	'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
649
650	'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
651	'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
652
653	'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
654	'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
655	'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
656	'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
657	'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
658	'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
659	'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
660	};
661
662
663
664	class ParserException(Exception):
665	""" Parser exception """
666	def __init__(self, sMessage):
667	Exception.__init__(self, sMessage);
668
669
670	class SimpleParser(object):
671	"""
672	Parser of IEMAllInstruction*.cpp.h instruction specifications.
673	"""
674
675	## @name Parser state.
676	## @{
677	kiCode = 0;
678	kiCommentMulti = 1;
679	## @}
680
681	def __init__(self, sSrcFile, asLines, sDefaultMap):
682	self.sSrcFile = sSrcFile;
683	self.asLines = asLines;
684	self.iLine = 0;
685	self.iState = self.kiCode;
686	self.sComment = '';
687	self.iCommentLine = 0;
688	self.asCurInstr = [];
689
690	assert sDefaultMap in g_dInstructionMaps;
691	self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
692
693	self.cTotalInstr = 0;
694	self.cTotalStubs = 0;
695	self.cTotalTagged = 0;
696
697	self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
698	self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
699	self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
700	self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
701	self.oReGroupName = re.compile('^op_[a-z0-9]+(\|_[a-z0-9]+\|_[a-z0-9]+_[a-z0-9]+)$');
702	self.fDebug = True;
703
704	self.dTagHandlers = {
705	'@opbrief': self.parseTagOpBrief,
706	'@opdesc': self.parseTagOpDesc,
707	'@opmnemonic': self.parseTagOpMnemonic,
708	'@op1': self.parseTagOpOperandN,
709	'@op2': self.parseTagOpOperandN,
710	'@op3': self.parseTagOpOperandN,
711	'@op4': self.parseTagOpOperandN,
712	'@oppfx': self.parseTagOpPfx,
713	'@opmaps': self.parseTagOpMaps,
714	'@opcode': self.parseTagOpcode,
715	'@openc': self.parseTagOpEnc,
716	'@opfltest': self.parseTagOpEFlags,
717	'@opflmodify': self.parseTagOpEFlags,
718	'@opflundef': self.parseTagOpEFlags,
719	'@opflset': self.parseTagOpEFlags,
720	'@opflclear': self.parseTagOpEFlags,
721	'@ophints': self.parseTagOpHints,
722	'@opcpuid': self.parseTagOpCpuId,
723	'@opgroup': self.parseTagOpGroup,
724	'@opunused': self.parseTagOpUnusedInvalid,
725	'@opinvalid': self.parseTagOpUnusedInvalid,
726	'@opinvlstyle': self.parseTagOpUnusedInvalid,
727	'@optest': self.parseTagOpTest,
728	'@opstats': self.parseTagOpStats,
729	'@opfunction': self.parseTagOpFunction,
730	'@opdone': self.parseTagOpDone,
731	};
732
733	self.asErrors = [];
734
735	def raiseError(self, sMessage):
736	"""
737	Raise error prefixed with the source and line number.
738	"""
739	raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
740
741	def raiseCommentError(self, iLineInComment, sMessage):
742	"""
743	Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
744	"""
745	raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
746
747	def error(self, sMessage):
748	"""
749	Adds an error.
750	returns False;
751	"""
752	self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
753	return False;
754
755	def errorComment(self, iLineInComment, sMessage):
756	"""
757	Adds a comment error.
758	returns False;
759	"""
760	self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
761	return False;
762
763	def printErrors(self):
764	"""
765	Print the errors to stderr.
766	Returns number of errors.
767	"""
768	if len(self.asErrors) > 0:
769	sys.stderr.write(u''.join(self.asErrors));
770	return len(self.asErrors);
771
772	def debug(self, sMessage):
773	"""
774	"""
775	if self.fDebug:
776	print('debug: %s' % (sMessage,));
777
778
779	def addInstruction(self, iLine = None):
780	"""
781	Adds an instruction.
782	"""
783	oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
784	g_aoAllInstructions.append(oInstr);
785	self.asCurInstr.append(oInstr);
786	return oInstr;
787
788	def doneInstructionOne(self, oInstr, iLine):
789	"""
790	Complete the parsing by processing, validating and expanding raw inputs.
791	"""
792	assert oInstr.iLineCompleted is None;
793	oInstr.iLineCompleted = iLine;
794
795	#
796	# Specified instructions.
797	#
798	if oInstr.cOpTags > 0:
799	if oInstr.sStats is None:
800	pass;
801
802	#
803	# Unspecified legacy stuff. We generally only got a few things to go on here.
804	# /** Opcode 0x0f 0x00 /0. */
805	# FNIEMOPRM_DEF(iemOp_Grp6_sldt)
806	#
807	else:
808	#if oInstr.sRawOldOpcodes:
809	#
810	#if oInstr.sMnemonic:
811	pass;
812
813	#
814	# Apply default map and then add the instruction to all it's groups.
815	#
816	if len(oInstr.aoMaps) == 0:
817	oInstr.aoMaps = [ self.oDefaultMap, ];
818	for oMap in oInstr.aoMaps:
819	oMap.aoInstructions.append(oInstr);
820
821	self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
822	return True;
823
824	def doneInstructions(self, iLineInComment = None):
825	"""
826	Done with current instruction.
827	"""
828	for oInstr in self.asCurInstr:
829	self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
830	if oInstr.fStub:
831	self.cTotalStubs += 1;
832
833	self.cTotalInstr += len(self.asCurInstr);
834
835	self.sComment = '';
836	self.asCurInstr = [];
837	return True;
838
839	def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
840	"""
841	Sets the sAttrib of all current instruction to oValue. If fOverwrite
842	is False, only None values and empty strings are replaced.
843	"""
844	for oInstr in self.asCurInstr:
845	if fOverwrite is not True:
846	oOldValue = getattr(oInstr, sAttrib);
847	if oOldValue is not None:
848	continue;
849	setattr(oInstr, sAttrib, oValue);
850
851	def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
852	"""
853	Sets the iEntry of the array sAttrib of all current instruction to oValue.
854	If fOverwrite is False, only None values and empty strings are replaced.
855	"""
856	for oInstr in self.asCurInstr:
857	aoArray = getattr(oInstr, sAttrib);
858	while len(aoArray) <= iEntry:
859	aoArray.append(None);
860	if fOverwrite is True or aoArray[iEntry] is None:
861	aoArray[iEntry] = oValue;
862
863	def parseCommentOldOpcode(self, asLines):
864	""" Deals with 'Opcode 0xff /4' like comments """
865	asWords = asLines[0].split();
866	if len(asWords) >= 2 \
867	and asWords[0] == 'Opcode' \
868	and ( asWords[1].startswith('0x')
869	or asWords[1].startswith('0X')):
870	asWords = asWords[:1];
871	for iWord, sWord in enumerate(asWords):
872	if sWord.startswith('0X'):
873	sWord = '0x' + sWord[:2];
874	asWords[iWord] = asWords;
875	self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
876
877	return False;
878
879	def ensureInstructionForOpTag(self, iTagLine):
880	""" Ensure there is an instruction for the op-tag being parsed. """
881	if len(self.asCurInstr) == 0:
882	self.addInstruction(self.iCommentLine + iTagLine);
883	for oInstr in self.asCurInstr:
884	oInstr.cOpTags += 1;
885	if oInstr.cOpTags == 1:
886	self.cTotalTagged += 1;
887	return self.asCurInstr[-1];
888
889	@staticmethod
890	def flattenSections(aasSections):
891	"""
892	Flattens multiline sections into stripped single strings.
893	Returns list of strings, on section per string.
894	"""
895	asRet = [];
896	for asLines in assSections:
897	if len(asLines) > 0:
898	asRet.append(' '.join([sLine.strip() for sLine in asLines]));
899	return asRet;
900
901	@staticmethod
902	def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
903	"""
904	Flattens sections into a simple stripped string with newlines as
905	section breaks. The final section does not sport a trailing newline.
906	"""
907	# Typical: One section with a single line.
908	if len(aasSections) == 1 and len(aasSections[0]) == 1:
909	return aasSections[0][0].strip();
910
911	sRet = '';
912	for iSection, asLines in enumerate(aasSections):
913	if len(asLines) > 0:
914	if iSection > 0:
915	sRet += sSectionSep;
916	sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
917	return sRet;
918
919
920
921	## @name Tag parsers
922	## @{
923
924	def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
925	"""
926	Tag: \@opbrief
927	Value: Text description, multiple sections, appended.
928
929	Brief description. If not given, it's the first sentence from @opdesc.
930	"""
931	oInstr = self.ensureInstructionForOpTag(iTagLine);
932
933	# Flatten and validate the value.
934	sBrief = self.flattenAllSections(aasSections);
935	if len(sBrief) == 0:
936	return self.errorComment(iTagLine, '%s: value required' % (sTag,));
937	if sBrief[-1] != '.':
938	sBrief = sBrief + '.';
939	if len(sBrief) > 180:
940	return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
941	offDot = sBrief.find('.');
942	while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
943	offDot = sBrief.find('.', offDot + 1);
944	if offDot >= 0 and offDot != len(sBrief) - 1:
945	return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
946
947	# Update the instruction.
948	if oInstr.sBrief is not None:
949	return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
950	% (sTag, oInstr.sBrief, sBrief,));
951	_ = iEndLine;
952	return True;
953
954	def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
955	"""
956	Tag: \@opdesc
957	Value: Text description, multiple sections, appended.
958
959	It is used to describe instructions.
960	"""
961	oInstr = self.ensureInstructionForOpTag(iTagLine);
962	if len(self.aoInstructions) > 0 and len(aasSections) > 0:
963	oInstr.asDescSections.extend(self.flattenSections(aasSections));
964	return True;
965
966	_ = sTag; _ = iEndLine;
967	return True;
968
969	def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
970	"""
971	Tag: @opmenmonic
972	Value: mnemonic
973
974	The 'mnemonic' value must be a valid C identifier string. Because of
975	prefixes, groups and whatnot, there times when the mnemonic isn't that
976	of an actual assembler mnemonic.
977	"""
978	oInstr = self.ensureInstructionForOpTag(iTagLine);
979
980	# Flatten and validate the value.
981	sMnemonic = self.flattenAllSections(aasSections);
982	if not self.oReMnemonic.match(sMnemonic):
983	return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
984	if oInstr.sMnemonic is not None:
985	return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
986	% (sTag, oInstr.sMnemonic, sMnemonic,));
987	oInstr.sMnemonic = sMnemonic
988
989	_ = iEndLine;
990	return True;
991
992	def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
993	"""
994	Tags: \@op1, \@op2, \@op3, \@op4
995	Value: where:type
996
997	The 'where' value indicates where the operand is found, like the 'reg'
998	part of the ModR/M encoding. See Instruction.kdOperandLocations for
999	a list.
1000
1001	The 'type' value indicates the operand type. These follow the types
1002	given in the opcode tables in the CPU reference manuals.
1003	See Instruction.kdOperandTypes for a list.
1004
1005	"""
1006	oInstr = self.ensureInstructionForOpTag(iTagLine);
1007	idxOp = int(sTag[-1]) - 1;
1008	assert idxOp >= 0 and idxOp < 4;
1009
1010	# flatten, split up, and validate the "where:type" value.
1011	sFlattened = self.flattenAllSections(aasSections);
1012	asSplit = sFlattened.split(':');
1013	if len(asSplit) != 2:
1014	return self.errorComment(iTagLine, 'expected %s value on format "<where>:<type>" not "%s"' % (sTag, sFlattened,));
1015
1016	(sWhere, sType) = asSplit;
1017	if sWhere not in Operand.kdLocations:
1018	return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1019	% (sTag, sWhere, ', '.join(Operand.kdLocations.keys()),), iTagLine);
1020
1021	if sType not in Operand.kdTypes:
1022	return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1023	% (sTag, sType, ', '.join(Operand.kdTypes.keys()),));
1024
1025	# Insert the operand, refusing to overwrite an existing one.
1026	while idxOp >= len(oInstr.aoOperands):
1027	oInstr.aoOperands.append(None);
1028	if oInstr.aoOperands[idxOp] is not None:
1029	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1030	% ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1031	sWhere, sType,));
1032	oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1033
1034	_ = iEndLine;
1035	return True;
1036
1037	def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1038	"""
1039	Tag: \@opmaps
1040	Value: map[,map2]
1041
1042	Indicates which maps the instruction is in. There is a default map
1043	associated with each input file.
1044	"""
1045	oInstr = self.ensureInstructionForOpTag(iTagLine);
1046
1047	# Flatten, split up and validate the value.
1048	sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1049	asMaps = sFlattened.split(',');
1050	if len(asMaps) == 0:
1051	return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1052	for sMap in asMaps:
1053	if sMap not in g_dInstructionMaps:
1054	return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1055	% (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
1056
1057	# Add the maps to the current list. Throw errors on duplicates.
1058	for oMap in oInstr.aoMaps:
1059	if oMap.sName in asMaps:
1060	return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
1061
1062	for sMap in asMaps:
1063	oMap = g_dInstructionMaps[sMap];
1064	if oMap not in oInstr.aoMaps:
1065	oInstr.aoMaps.append(oMap);
1066	else:
1067	self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
1068
1069	_ = iEndLine;
1070	return True;
1071
1072	## \@oppfx values.
1073	kdPrefixes = {
1074	'0x66': [],
1075	'0xf3': [],
1076	'0xf2': [],
1077	};
1078
1079	def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
1080	"""
1081	Tag: \@oppfx
1082	Value: none\|0x66\|0xf3\|0xf2
1083
1084	Required prefix for the instruction. (In a (E)VEX context this is the
1085	value of the 'pp' field rather than an actual prefix.)
1086	"""
1087	oInstr = self.ensureInstructionForOpTag(iTagLine);
1088
1089	# Flatten and validate the value.
1090	sFlattened = self.flattenAllSections(aasSections);
1091	asPrefixes = sFlattened.split();
1092	if len(asPrefixes) > 1:
1093	return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
1094
1095	sPrefix = asPrefixes[0].lower();
1096	if sPrefix == 'none':
1097	sPrefix = None;
1098	else:
1099	if len(sPrefix) == 2:
1100	sPrefix = '0x' + sPrefix;
1101	if _isValidOpcodeByte(sPrefix):
1102	return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
1103
1104	if sPrefix is not None and sPrefix not in self.kdPrefixes:
1105	return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, self.kdPrefixes,));
1106
1107	# Set it.
1108	if oInstr.sPrefix is not None:
1109	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
1110	oInstr.sPrefix = sPrefix;
1111
1112	_ = iEndLine;
1113	return True;
1114
1115	## Special \@opcode tag values.
1116	kdSpecialOpcodes = {
1117	'/reg': [],
1118	'mr/reg': [],
1119	'11 /reg': [],
1120	'!11 /reg': [],
1121	'11 mr/reg': [],
1122	'!11 mr/reg': [],
1123	};
1124
1125	def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
1126	"""
1127	Tag: \@opcode
1128	Value: 0x?? \| /reg \| mr/reg \| 11 /reg \| !11 /reg \| 11 mr/reg \| !11 mr/reg
1129
1130	The opcode byte or sub-byte for the instruction in the context of a map.
1131	"""
1132	oInstr = self.ensureInstructionForOpTag(iTagLine);
1133
1134	# Flatten and validate the value.
1135	sOpcode = self.flattenAllSections(aasSections);
1136	if sOpcode in self.kdSpecialOpcodes:
1137	pass;
1138	elif not _isValidOpcodeByte(sOpcode):
1139	return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
1140
1141	# Set it.
1142	if oInstr.sOpcode is not None:
1143	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
1144	oInstr.sOpcode = sOpcode;
1145
1146	_ = iEndLine;
1147	return True;
1148
1149	## Valid values for \@openc
1150	kdEncodings = {
1151	'ModR/M': [],
1152	};
1153
1154	def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
1155	"""
1156	Tag: \@openc
1157	Value: ModR/M\|TBD
1158
1159	The instruction operand encoding style.
1160	"""
1161	oInstr = self.ensureInstructionForOpTag(iTagLine);
1162
1163	# Flatten and validate the value.
1164	sEncoding = self.flattenAllSections(aasSections);
1165	if sEncoding in self.kdEncodings:
1166	pass;
1167	elif not _isValidOpcodeByte(sEncoding):
1168	return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
1169
1170	# Set it.
1171	if oInstr.sEncoding is not None:
1172	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
1173	% ( sTag, oInstr.sEncoding, sEncoding,));
1174	oInstr.sEncoding = sEncoding;
1175
1176	_ = iEndLine;
1177	return True;
1178
1179	## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
1180	kdEFlags = {
1181	# Debugger flag notation:
1182	'ov': 'X86_EFL_OF', ##< OVerflow.
1183	'nv': '!X86_EFL_OF', ##< No Overflow.
1184
1185	'ng': 'X86_EFL_SF', ##< NeGative (sign).
1186	'pl': '!X86_EFL_SF', ##< PLuss (sign).
1187
1188	'zr': 'X86_EFL_ZF', ##< ZeRo.
1189	'nz': '!X86_EFL_ZF', ##< No Zero.
1190
1191	'af': 'X86_EFL_AF', ##< Aux Flag.
1192	'na': '!X86_EFL_AF', ##< No Aux.
1193
1194	'po': 'X86_EFL_PF', ##< Parity Pdd.
1195	'pe': '!X86_EFL_PF', ##< Parity Even.
1196
1197	'cf': 'X86_EFL_CF', ##< Carry Flag.
1198	'nc': '!X86_EFL_CF', ##< No Carry.
1199
1200	'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
1201	'di': '!X86_EFL_IF', ##< Disabled Interrupts.
1202
1203	'dn': 'X86_EFL_DF', ##< DowN (string op direction).
1204	'up': '!X86_EFL_DF', ##< UP (string op direction).
1205
1206	'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
1207	'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
1208	'ac': 'X86_EFL_AC', ##< Alignment Check.
1209	'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
1210	'rf': 'X86_EFL_RF', ##< Resume Flag.
1211	'nt': 'X86_EFL_NT', ##< Nested Task.
1212	'tf': 'X86_EFL_TF', ##< Trap flag.
1213
1214	# Reference manual notation:
1215	'of': 'X86_EFL_OF',
1216	'sf': 'X86_EFL_SF',
1217	'zf': 'X86_EFL_ZF',
1218	'cf': 'X86_EFL_CF',
1219	'pf': 'X86_EFL_PF',
1220	'if': 'X86_EFL_IF',
1221	'df': 'X86_EFL_DF',
1222	'iopl': 'X86_EFL_IOPL',
1223	'id': 'X86_EFL_ID',
1224	};
1225
1226	## EFlags tag to Instruction attribute name.
1227	kdOpFlagToAttr = {
1228	'@opfltest': 'asFlTest',
1229	'@opflmodify': 'asFlModify',
1230	'@opflundef': 'asFlUndefined',
1231	'@opflset': 'asFlSet',
1232	'@opflclear': 'asFlClear',
1233	};
1234
1235	def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
1236	"""
1237	Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
1238	Value: <eflags specifier>
1239
1240	"""
1241	oInstr = self.ensureInstructionForOpTag(iTagLine);
1242
1243	# Flatten, split up and validate the values.
1244	asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
1245	if len(asFlags) == 1 and asFlags[0].lower() == 'none':
1246	asFlags = [];
1247	else:
1248	fRc = True;
1249	for iFlag, sFlag in enumerate(asFlags):
1250	if sFlag not in self.kdEFlags:
1251	if sFlag.strip() in self.kdEFlags:
1252	asFlags[iFlag] = sFlag.strip();
1253	else:
1254	fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
1255	if not fRc:
1256	return False;
1257
1258	# Set them.
1259	asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
1260	if asOld is not None:
1261	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
1262	setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
1263
1264	_ = iEndLine;
1265	return True;
1266
1267	## \@ophints values.
1268	kdHints = {
1269	'invalid': 'DISOPTYPE_INVALID', ##<
1270	'harmless': 'DISOPTYPE_HARMLESS', ##<
1271	'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
1272	'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
1273	'dangerous': 'DISOPTYPE_DANGEROUS', ##<
1274	'portio': 'DISOPTYPE_PORTIO', ##<
1275	'privileged': 'DISOPTYPE_PRIVILEGED', ##<
1276	'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
1277	'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
1278	'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
1279	'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
1280	'interrupt': 'DISOPTYPE_INTERRUPT', ##<
1281	'illegal': 'DISOPTYPE_ILLEGAL', ##<
1282	'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0. */
1283	'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0. */
1284	'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
1285	'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
1286	'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
1287	'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode */
1288	'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode */
1289	'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size */
1290	'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes */
1291	'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte */
1292	'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b */
1293	'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes (only in 16 & 32 bits mode!) */
1294	'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet! */
1295	'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet! */
1296	'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet! */
1297	};
1298
1299	def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
1300	"""
1301	Tag: \@ophints
1302	Value: Comma or space separated list of flags and hints.
1303
1304	This covers the disassembler flags table and more.
1305	"""
1306	oInstr = self.ensureInstructionForOpTag(iTagLine);
1307
1308	# Flatten as a space separated list, split it up and validate the values.
1309	asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1310	if len(asHints) == 1 and asHints[0].lower() == 'none':
1311	asHints = [];
1312	else:
1313	fRc = True;
1314	for iHint, sHint in enumerate(asHints):
1315	if sHint not in self.kdHints:
1316	if sHint.strip() in self.kdHints:
1317	sHint[iHint] = sHint.strip();
1318	else:
1319	fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
1320	if not fRc:
1321	return False;
1322
1323	# Append them.
1324	for sHint in asHints:
1325	if sHint not in oInstr.dHints:
1326	oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
1327	else:
1328	self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
1329
1330	_ = iEndLine;
1331	return True;
1332
1333	## \@opcpuid
1334	kdCpuIdFlags = {
1335	'vme': 'X86_CPUID_FEATURE_EDX_VME',
1336	'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
1337	'msr': 'X86_CPUID_FEATURE_EDX_MSR',
1338	'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
1339	'sep': 'X86_CPUID_FEATURE_EDX_SEP',
1340	'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
1341	'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
1342	'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
1343	'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
1344	'sse': 'X86_CPUID_FEATURE_EDX_SSE',
1345	'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
1346	'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
1347	'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
1348	'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
1349	'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
1350	'smx': 'X86_CPUID_FEATURE_ECX_TM2',
1351	'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
1352	'fma': 'X86_CPUID_FEATURE_ECX_FMA',
1353	'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
1354	'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
1355	'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
1356	'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
1357	'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
1358	'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
1359	'aes': 'X86_CPUID_FEATURE_ECX_AES',
1360	'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
1361	'avx': 'X86_CPUID_FEATURE_ECX_AVX',
1362	'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
1363	'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
1364
1365	'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
1366	'3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
1367	'3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
1368	'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
1369	'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
1370	'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
1371	'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
1372	'3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
1373	'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
1374	'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
1375	};
1376
1377	def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
1378	"""
1379	Tag: \@opcpuid
1380	Value: none \| <CPUID flag specifier>
1381
1382	CPUID feature bit which is required for the instruction to be present.
1383	"""
1384	oInstr = self.ensureInstructionForOpTag(iTagLine);
1385
1386	# Flatten as a space separated list, split it up and validate the values.
1387	asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1388	if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
1389	asCpuIds = [];
1390	else:
1391	fRc = True;
1392	for iCpuId, sCpuId in enumerate(asCpuIds):
1393	if sCpuId not in self.kdCpuIds:
1394	if sCpuId.strip() in self.kdCpuIds:
1395	sCpuId[iCpuId] = sCpuId.strip();
1396	else:
1397	fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
1398	if not fRc:
1399	return False;
1400
1401	# Append them.
1402	for sCpuId in asCpuIds:
1403	if sCpuId not in oInstr.asCpuIds:
1404	oInstr.asCpuIds.append(sCpuId);
1405	else:
1406	self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
1407
1408	_ = iEndLine;
1409	return True;
1410
1411	def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
1412	"""
1413	Tag: \@opgroup
1414	Value: op_grp1[_subgrp2[_subsubgrp3]]
1415
1416	Instruction grouping.
1417	"""
1418	oInstr = self.ensureInstructionForOpTag(iTagLine);
1419
1420	# Flatten as a space separated list, split it up and validate the values.
1421	asGroups = self.flattenAllSections(aasSections).split();
1422	if len(asGroups) != 1:
1423	return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
1424	sGroup = asGroups[0];
1425	if not self.oReGroupName.match(sGroup):
1426	return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
1427	% (sTag, sGroup, self.oReGroupName.pattern));
1428
1429	# Set it.
1430	if oInstr.sGroup is not None:
1431	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
1432	oInstr.sGroup = sGroup;
1433
1434	_ = iEndLine;
1435	return True;
1436
1437	## \@opunused, \@opinvalid, \@opinvlstyle
1438	kdInvalidStyles = {
1439	'immediate': [], ##< CPU stops decoding immediately after the opcode.
1440	'intel-modrm': [], ##< Intel decodes ModR/M.
1441	'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
1442	'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
1443	'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
1444	};
1445
1446	def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
1447	"""
1448	Tag: \@opunused, \@opinvalid, \@opinvlstyle
1449	Value: <invalid opcode behaviour style>
1450
1451	The \@opunused indicates the specification is for a currently unused
1452	instruction encoding.
1453
1454	The \@opinvalid indicates the specification is for an invalid currently
1455	instruction encoding (like UD2).
1456
1457	The \@opinvlstyle just indicates how CPUs decode the instruction when
1458	not supported (\@opcpuid, \@opmincpu) or disabled.
1459	"""
1460	oInstr = self.ensureInstructionForOpTag(iTagLine);
1461
1462	# Flatten as a space separated list, split it up and validate the values.
1463	asStyles = self.flattenAllSections(aasSections).split();
1464	if len(asStyles) != 1:
1465	return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
1466	sStyle = asStyles[0];
1467	if sStyle not in self.kdInvalidStyle:
1468	return self.errorComment(iTagLine, '%s: invalid invalid behviour style: %s (valid: %s)'
1469	% (sTag, sStyle, self.kdInvalidStyles.keys(),));
1470	# Set it.
1471	if oInstr.sInvlStyle is not None:
1472	return self.errorComment(iTagLine,
1473	'%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
1474	% ( sTag, oInstr.sInvlStyle, sStyle,));
1475	oInstr.sInvlStyle = sStyle;
1476	if sTag == '@opunused':
1477	oInstr.fUnused = True;
1478	elif sTag == '@opinvalid':
1479	oInstr.fInvalid = True;
1480
1481	_ = iEndLine;
1482	return True;
1483
1484	def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine):
1485	"""
1486	Tag: \@optest
1487	Value: [<selectors>[ ]?] <inputs> -> <outputs>
1488	Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
1489
1490	The main idea here is to generate basic instruction tests.
1491
1492	The probably simplest way of handling the diverse input, would be to use
1493	it to produce size optimized byte code for a simple interpreter that
1494	modifies the register input and output states.
1495
1496	An alternative to the interpreter would be creating multiple tables,
1497	but that becomes rather complicated wrt what goes where and then to use
1498	them in an efficient manner.
1499	"""
1500	oInstr = self.ensureInstructionForOpTag(iTagLine);
1501
1502	#
1503	# Do it section by section.
1504	#
1505	for asSectionLines in aasSections:
1506	#
1507	# Sort the input into outputs, inputs and selector conditions.
1508	#
1509	sFlatSection = self.flattenAllSections([asSectionLines,]);
1510	if len(sFlatSection) == 0:
1511	self.errorComment(iTagLine, '%s: missing value' % ( sTag,));
1512	continue;
1513	oTest = InstructionTest(oInstr);
1514
1515	asSelectors = [];
1516	asInputs = [];
1517	asOutputs = [];
1518	asCur = asOutputs;
1519	fRc = True;
1520	asWords = sFlatSection.split();
1521	for iWord in range(len(asWords) - 1, -1, -1):
1522	sWord = asWords[iWord];
1523	# Check for array switchers.
1524	if sWord == '->':
1525	if asCur != asOutputs:
1526	fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
1527	break;
1528	asCur = asInputs;
1529	elif sWord == '/':
1530	if asCur != asInputs:
1531	fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
1532	break;
1533	asCur = asSelectors;
1534	else:
1535	asCur.insert(0, sWord);
1536
1537	#
1538	# Validate and add selectors.
1539	#
1540	for sCond in asSelectors:
1541	sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
1542	oSelector = None;
1543	for sOp in TestSelector.kasCompareOps:
1544	off = sCondExp.find(sOp);
1545	if off >= 0:
1546	sVariable = sCondExp[:off];
1547	sValue = sCondExp[off + len(sOp):];
1548	if sVariable in TestSelector.kdVariables:
1549	if sValue in TestSelector.kdVariables[sVariable]:
1550	oSelector = TestSelector(sVariable, sOp, sValue);
1551	else:
1552	self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
1553	% ( sTag, sValue, sCond,
1554	TestSelector.kdVariables[sVariable].keys(),));
1555	else:
1556	self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
1557	% ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
1558	break;
1559	if oSelector is not None:
1560	for oExisting in oTest.aoSelectors:
1561	if oExisting.sVariable == oSelector.sVariable:
1562	self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
1563	% ( sTag, oSelector.sVariable, oExisting, oSelector,));
1564	oTest.aoSelectors.append(oSelector);
1565	else:
1566	fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
1567
1568	#
1569	# Validate outputs and inputs, adding them to the test as we go along.
1570	#
1571	for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
1572	for sItem in asItems:
1573	oItem = None;
1574	for sOp in TestInOut.kasOperators:
1575	off = sItem.find(sOp);
1576	if off >= 0:
1577	sField = sItem[:off];
1578	sValueType = sItem[off + len(sOp):];
1579	if sField in TestInOut.kdFields:
1580	asSplit = sValueType.split(':', 1);
1581	sValue = asSplit[0];
1582	sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
1583	if sType in TestInOut.kdTypes:
1584	oValid = TestInOut.kdTypes[sType].validate(sValue);
1585	if oValid is True:
1586	if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '!=':
1587	oItem = TestInOut(sField, sOp, sValue, sType);
1588	else:
1589	self.errorComment(iTagLine,
1590	'%s: and-or value "%s" can only be used with the "="'
1591	% ( sTag, sDesc, sValue, sItem, sType, ));
1592	else:
1593	self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s)'
1594	% ( sTag, sDesc, sValue, sItem, sType, ));
1595	else:
1596	self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
1597	% ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
1598	else:
1599	self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s" (valid fields: %s)'
1600	% ( sTag, sDesc, sField, sItem, TestInOut.kdFields.keys(),));
1601	break;
1602	if oItem is not None:
1603	for oExisting in aoDst:
1604	if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
1605	self.errorComment(iTagLine,
1606	'%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
1607	% ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
1608	aoDst.append(oItem);
1609	else:
1610	fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
1611
1612	#
1613	# .
1614	#
1615	if fRc:
1616	oInstr.aoTests.append(oTest);
1617	else:
1618	self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
1619	self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
1620	% (sTag, asSelectors, asInputs, asOutputs,));
1621
1622	return True;
1623
1624	def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
1625	"""
1626	Tag: \@opfunction
1627	Value: <VMM function name>
1628
1629	This is for explicitly setting the IEM function name. Normally we pick
1630	this up from the FNIEMOP_XXX macro invocation after the description, or
1631	generate it from the mnemonic and operands.
1632
1633	It it thought it maybe necessary to set it when specifying instructions
1634	which implementation isn't following immediately or aren't implemented yet.
1635	"""
1636	oInstr = self.ensureInstructionForOpTag(iTagLine);
1637
1638	# Flatten and validate the value.
1639	sFunction = self.flattenAllSections(aasSections);
1640	if not self.oReFunctionName.match(sFunction):
1641	return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
1642	% (sTag, Name, self.oReFunctionName.pattern));
1643
1644	if oInstr.sFunction is not None:
1645	return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
1646	% (sTag, oInstr.sStats, sStats,));
1647	oInstr.sFunction = sFunction;
1648
1649	_ = iEndLine;
1650	return True;
1651
1652	def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
1653	"""
1654	Tag: \@opstats
1655	Value: <VMM statistics base name>
1656
1657	This is for explicitly setting the statistics name. Normally we pick
1658	this up from the IEMOP_MNEMONIC macro invocation, or generate it from
1659	the mnemonic and operands.
1660
1661	It it thought it maybe necessary to set it when specifying instructions
1662	which implementation isn't following immediately or aren't implemented yet.
1663	"""
1664	oInstr = self.ensureInstructionForOpTag(iTagLine);
1665
1666	# Flatten and validate the value.
1667	sStats = self.flattenAllSections(aasSections);
1668	if not self.oReStatsName.match(sStats):
1669	return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
1670	% (sTag, Name, self.oReStatsName.pattern));
1671
1672	if oInstr.sStats is not None:
1673	return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
1674	% (sTag, oInstr.sStats, sStats,));
1675	oInstr.sStats = sStats;
1676
1677	_ = iEndLine;
1678	return True;
1679
1680	def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
1681	"""
1682	Tag: \@opdone
1683	Value: none
1684
1685	Used to explictily flush the instructions that have been specified.
1686	"""
1687	sFlattened = self.flattenAllSections(aasSections);
1688	if sFlattened != '':
1689	return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
1690	_ = sTag; _ = iEndLine;
1691	return self.doneInstructions();
1692
1693	## @}
1694
1695
1696	def parseComment(self):
1697	"""
1698	Parse the current comment (self.sComment).
1699
1700	If it's a opcode specifiying comment, we reset the macro stuff.
1701	"""
1702	#
1703	# Reject if comment doesn't seem to contain anything interesting.
1704	#
1705	if self.sComment.find('Opcode') < 0 \
1706	and self.sComment.find('@') < 0:
1707	return False;
1708
1709	#
1710	# Split the comment into lines, removing leading asterisks and spaces.
1711	# Also remove leading and trailing empty lines.
1712	#
1713	asLines = self.sComment.split('\n');
1714	for iLine, sLine in enumerate(asLines):
1715	asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
1716
1717	while len(asLines) > 0 and len(asLines[0]) == 0:
1718	self.iCommentLine += 1;
1719	asLines.pop(0);
1720
1721	while len(asLines) > 0 and len(asLines[-1]) == 0:
1722	asLines.pop(len(asLines) - 1);
1723
1724	#
1725	# Check for old style: Opcode 0x0f 0x12
1726	#
1727	if asLines[0].startswith('Opcode '):
1728	self.parseCommentOldOpcode(asLines);
1729
1730	#
1731	# Look for @op* tagged data.
1732	#
1733	cOpTags = 0;
1734	sFlatDefault = None;
1735	sCurTag = '@default';
1736	iCurTagLine = 0;
1737	asCurSection = [];
1738	aasSections = [ asCurSection, ];
1739	for iLine, sLine in enumerate(asLines):
1740	if not sLine.startswith('@'):
1741	if len(sLine) > 0:
1742	asCurSection.append(sLine);
1743	elif len(asCurSection) != 0:
1744	asCurSection = [];
1745	aasSections.append(asCurSection);
1746	else:
1747	#
1748	# Process the previous tag.
1749	#
1750	if sCurTag in self.dTagHandlers:
1751	self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
1752	cOpTags += 1;
1753	elif sCurTag.startswith('@op'):
1754	self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
1755	elif sCurTag == '@default':
1756	sFlatDefault = self.flattenAllSections(aasSections);
1757
1758	#
1759	# New tag.
1760	#
1761	asSplit = sLine.split(None, 1);
1762	sCurTag = asSplit[0].lower();
1763	if len(asSplit) > 1:
1764	asCurSection = [asSplit[1],];
1765	else:
1766	asCurSection = [];
1767	aasSections = [asCurSection, ];
1768	iCurTagLine = iLine;
1769
1770	#
1771	# Process the final tag.
1772	#
1773	if sCurTag in self.dTagHandlers:
1774	self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
1775	cOpTags += 1;
1776	elif sCurTag.startswith('@op'):
1777	self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
1778	elif sCurTag == '@default':
1779	sFlatDefault = self.flattenAllSections(aasSections);
1780
1781	#
1782	# Don't allow default text in blocks containing @op*.
1783	#
1784	if cOpTags > 0 and len(sFlatDefault) > 0:
1785	self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
1786
1787	return True;
1788
1789	def parseMacroInvocation(self, sInvocation):
1790	"""
1791	Parses a macro invocation.
1792
1793	Returns a tuple, first element is the offset following the macro
1794	invocation. The second element is a list of macro arguments, where the
1795	zero'th is the macro name.
1796	"""
1797	# First the name.
1798	offOpen = sInvocation.find('(');
1799	if offOpen <= 0:
1800	raiseError("macro invocation open parenthesis not found");
1801	sName = sInvocation[:offOpen].strip();
1802	if not self.oReMacroName.match(sName):
1803	return self.error("invalid macro name '%s'" % (sName,));
1804	asRet = [sName, ];
1805
1806	# Arguments.
1807	iLine = self.iLine;
1808	cDepth = 1;
1809	off = offOpen + 1;
1810	offStart = off;
1811	while cDepth > 0:
1812	if off >= len(sInvocation):
1813	if iLine >= len(self.asLines):
1814	return self.error('macro invocation beyond end of file');
1815	sInvocation += self.asLines[iLine];
1816	iLine += 1;
1817	ch = sInvocation[off];
1818
1819	if ch == ',' or ch == ')':
1820	if cDepth == 1:
1821	asRet.append(sInvocation[offStart:off].strip());
1822	offStart = off + 1;
1823	if ch == ')':
1824	cDepth -= 1;
1825	elif ch == '(':
1826	cDepth += 1;
1827	off += 1;
1828
1829	return (off, asRet);
1830
1831	def findAndParseMacroInvocation(self, sCode, sMacro):
1832	"""
1833	Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
1834	"""
1835	offHit = sCode.find(sMacro);
1836	if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
1837	offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
1838	return (offHit + offAfter, asRet);
1839	return (len(sCode), None);
1840
1841	def findAndParseFirstMacroInvocation(self, sCode, asMacro):
1842	"""
1843	Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
1844	"""
1845	for sMacro in asMacro:
1846	offAfter, asRet = self.findAndParseMacroInvocation(sCode, sMacro);
1847	if asRet is not None:
1848	return (offAfter, asRet);
1849	return (len(sCode), None);
1850
1851	def checkCodeForMacro(self, sCode):
1852	"""
1853	Checks code for relevant macro invocation.
1854	"""
1855	#
1856	# Scan macro invocations.
1857	#
1858	if sCode.find('(') > 0:
1859	# Look for instruction decoder function definitions. ASSUME single line.
1860	(_, asArgs) = self.findAndParseFirstMacroInvocation(sCode,
1861	[ 'FNIEMOP_DEF',
1862	'FNIEMOP_STUB',
1863	'FNIEMOP_STUB_1',
1864	'FNIEMOP_UD_STUB',
1865	'FNIEMOP_UD_STUB_1' ]);
1866	if asArgs is not None:
1867	sFunction = asArgs[1];
1868
1869	if len(self.asCurInstr) == 0:
1870	self.addInstruction().sMnemonic = sFunction.split('_')[1];
1871	self.setInstrunctionAttrib('sFunction', sFunction);
1872	self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
1873	self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
1874	if asArgs[0].find('STUB') > 0:
1875	self.doneInstructions();
1876	return True;
1877
1878	# IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
1879	(_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
1880	if asArgs is not None:
1881	if len(self.asCurInstr) == 1:
1882	self.setInstrunctionAttrib('sStats', asArgs[1]);
1883	self.setInstrunctionAttrib('sMnemonic', asArgs[1].split('_')[0]);
1884
1885	# IEMOP_HLP_DECODED_NL_1(a_uDisOpNo, a_fIemOpFlags, a_uDisParam0, a_fDisOpType)
1886	(_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_HLP_DECODED_NL_1');
1887	if asArgs is not None:
1888	if len(self.asCurInstr) == 1:
1889	self.setInstrunctionAttrib('sRawDisOpNo', asArgs[1]);
1890	self.setInstrunctionAttrib('sRawIemOpFlags', asArgs[2]);
1891	self.setInstrunctionArrayAttrib('asRawDisParams', 0, asArgs[3]);
1892
1893	# IEMOP_HLP_DECODED_NL_2(a_uDisOpNo, a_fIemOpFlags, a_uDisParam0, a_uDisParam1, a_fDisOpType)
1894	(_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_HLP_DECODED_NL_2');
1895	if asArgs is not None:
1896	if len(self.asCurInstr) == 1:
1897	self.setInstrunctionAttrib('sRawDisOpNo', asArgs[1]);
1898	self.setInstrunctionAttrib('sRawIemOpFlags', asArgs[2]);
1899	self.setInstrunctionArrayAttrib('asRawDisParams', 0, asArgs[3]);
1900	self.setInstrunctionArrayAttrib('asRawDisParams', 1, asArgs[4]);
1901
1902	return False;
1903
1904
1905	def parse(self):
1906	"""
1907	Parses the given file.
1908	Returns number or errors.
1909	Raises exception on fatal trouble.
1910	"""
1911	self.debug('Parsing %s' % (self.sSrcFile,));
1912
1913	while self.iLine < len(self.asLines):
1914	sLine = self.asLines[self.iLine];
1915	self.iLine += 1;
1916
1917	# We only look for comments, so only lines with a slash might possibly
1918	# influence the parser state.
1919	if sLine.find('/') >= 0:
1920	#self.debug('line %d: slash' % (self.iLine,));
1921
1922	offLine = 0;
1923	while offLine < len(sLine):
1924	if self.iState == self.kiCode:
1925	offHit = sLine.find('/*', offLine); # only multiline comments for now.
1926	if offHit >= 0:
1927	self.sComment = '';
1928	self.iCommentLine = self.iLine;
1929	self.iState = self.kiCommentMulti;
1930	offLine = offHit + 2;
1931	else:
1932	offLine = len(sLine);
1933
1934	elif self.iState == self.kiCommentMulti:
1935	offHit = sLine.find('*/', offLine);
1936	if offHit >= 0:
1937	self.sComment += sLine[offLine:offHit];
1938	self.iState = self.kiCode;
1939	offLine = offHit + 2;
1940	self.parseComment();
1941	else:
1942	self.sComment += sLine[offLine:];
1943	offLine = len(sLine);
1944	else:
1945	assert False;
1946
1947	# No slash, but append the line if in multi-line comment.
1948	elif self.iState == self.kiCommentMulti:
1949	#self.debug('line %d: multi' % (self.iLine,));
1950	self.sComment += sLine;
1951
1952	# No slash, but check code line for relevant macro.
1953	elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
1954	#self.debug('line %d: macro' % (self.iLine,));
1955	self.checkCodeForMacro(sLine);
1956
1957	# If the line is a '}' in the first position, complete the instructions.
1958	elif self.iState == self.kiCode and sLine[0] == '}':
1959	#self.debug('line %d: }' % (self.iLine,));
1960	self.doneInstructions();
1961
1962	self.doneInstructions();
1963	self.debug('%s instructions in %s' % (self.cTotalInstr, self.sSrcFile,));
1964	self.debug('%s instruction stubs' % (self.cTotalStubs,));
1965	return self.printErrors();
1966
1967
1968	def __parseFileByName(sSrcFile, sDefaultMap):
1969	"""
1970	Parses one source file for instruction specfications.
1971	"""
1972	#
1973	# Read sSrcFile into a line array.
1974	#
1975	try:
1976	oFile = open(sSrcFile, "r");
1977	except Exception as oXcpt:
1978	raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
1979	try:
1980	asLines = oFile.readlines();
1981	except Exception as oXcpt:
1982	raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
1983	finally:
1984	oFile.close();
1985
1986	#
1987	# Do the parsing.
1988	#
1989	try:
1990	cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
1991	except ParserException as oXcpt:
1992	print(str(oXcpt));
1993	raise;
1994	except Exception as oXcpt:
1995	raise;
1996
1997	return cErrors;
1998
1999
2000	def __parseAll():
2001	"""
2002	Parses all the IEMAllInstruction*.cpp.h files.
2003
2004	Raises exception on failure.
2005	"""
2006	sSrcDir = os.path.dirname(os.path.abspath(__file__));
2007	cErrors = 0;
2008	for sDefaultMap, sName in [
2009	( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
2010	#( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
2011	]:
2012	cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
2013
2014	if cErrors != 0:
2015	raise Exception('%d parse errors' % (cErrors,));
2016	return True;
2017
2018
2019
2020	__parseAll();
2021
2022

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 65834

Download in other formats: