IEMAllInstructionsPython.py@ 65805

Last change on this file since 65805 was 65805, checked in by vboxsync, 8 years ago
IEMAllInstructionsPython.py: Basics doxygen stuff done, some work still needed esp. with @optest and old style docs.
Property svn:eol-style set to `LF` Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 69.4 KB

Line
1	#!/usr/bin/env python
2	# -- coding: utf-8 --
3	# $Id: IEMAllInstructionsPython.py 65805 2017-02-17 23:49:51Z vboxsync $
4
5	"""
6	IEM instruction extractor.
7
8	This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9	collects information about the instructions. It can then be used to generate
10	disassembler tables and tests.
11	"""
12
13	__copyright__ = \
14	"""
15	Copyright (C) 2017 Oracle Corporation
16
17	This file is part of VirtualBox Open Source Edition (OSE), as
18	available from http://www.virtualbox.org. This file is free software;
19	you can redistribute it and/or modify it under the terms of the GNU
20	General Public License (GPL) as published by the Free Software
21	Foundation, in version 2 as it comes in the "COPYING" file of the
22	VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23	hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25	The contents of this file may alternatively be used under the terms
26	of the Common Development and Distribution License Version 1.0
27	(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28	VirtualBox OSE distribution, in which case the provisions of the
29	CDDL are applicable instead of those of the GPL.
30
31	You may elect to license modified versions of this file under the
32	terms and conditions of either the GPL or the CDDL or both.
33	"""
34	__version__ = "$Revision: 65805 $"
35
36	# Standard python imports.
37	import os
38	import re
39	import sys
40
41	# Only the main script needs to modify the path.
42	g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
43	'ValidationKit');
44	sys.path.append(g_ksValidationKitDir);
45
46	from common import utils;
47
48
49	# Annotation example:
50	#
51	# @opmnemonic add
52	# @op1 reg:Eb
53	# @op2 rm:Gb
54	# @opmaps onebyte
55	# @oppfx none
56	# @opcode 0x00
57	# @openc ModR/M
58	# @opfltest none
59	# @opflmodify of,sz,zf,af,pf,cf
60	# @opflundef none
61	# @opflset none
62	# @opflclear none
63	# @ophints harmless
64	# @opstats add_Eb_Gb
65	# @opgroup op_gen_arith_bin
66	# @optest in1=1 in2=1 -> out1=2 outfl=a?,p?
67	# @optest oppfx:o32 in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
68
69
70	def _isValidOpcodeByte(sOpcode):
71	"""
72	Checks if sOpcode is a valid lower case opcode byte.
73	Returns true/false.
74	"""
75	if len(sOpcode) == 4:
76	if sOpcode[:2] == '0x':
77	if sOpcode[2] in '0123456789abcdef':
78	if sOpcode[3] in '0123456789abcdef':
79	return True;
80	return False;
81
82
83	class InstructionMap(object):
84	"""
85	Instruction map.
86
87	The opcode map provides the lead opcode bytes (empty for the one byte
88	opcode map). An instruction can be member of multiple opcode maps as long
89	as it uses the same opcode value within the map (because of VEX).
90	"""
91
92	kdEncodings = {
93	'legacy': [],
94	'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
95	'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
96	'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
97	'xop8': [], ##< XOP prefix with vvvvv = 8
98	'xop9': [], ##< XOP prefix with vvvvv = 9
99	'xop10': [], ##< XOP prefix with vvvvv = 10
100	};
101	kdSelectors = {
102	'byte': [], ##< next opcode byte selects the instruction (default).
103	'/r': [], ##< modrm.reg selects the instruction.
104	'mod /r': [], ##< modrm.reg and modrm.mod selects the instruction.
105	'!11 /r': [], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
106	'11 /r': [], ##< modrm.reg select the instruction with modrm.mod == 0y11.
107	'11': [], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
108	};
109
110	def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy'):
111	assert sSelector in self.kdSelectors;
112	assert sEncoding in self.kdEncodings;
113	if asLeadOpcodes is None:
114	asLeadOpcodes = [];
115	else:
116	for sOpcode in asLeadOpcodes:
117	assert _isValidOpcodeByte(sOpcode);
118
119	self.sName = sName;
120	self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
121	self.sSelector = sSelector; ##< The member selector, see kdSelectors.
122	self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
123	self.aoInstructions = []; # type: Instruction
124
125
126	class TestType(object):
127	"""
128	Test value type.
129	"""
130	def __init__(self, sName, sTodo):
131	self.sName = sName;
132	self.sTodo = sTodo;
133
134
135	class TestInOut(object):
136	"""
137	One input or output state modifier.
138
139	This should be thought as values to modify BS3REGCTX and extended (needs
140	to be structured) state.
141	"""
142	## Assigned operators.
143	kasOperators = [
144	'&~=',
145	'&=',
146	'\|=',
147	'='
148	];
149	## CPU context fields.
150	kdFields = {
151	'op1': [], ## @op1
152	'op2': [], ## @op2
153	'op3': [], ## @op3
154	'op4': [], ## @op4
155
156	'efl': [],
157
158	'al': [],
159	'cl': [],
160	'dl': [],
161	'bl': [],
162	'ah': [],
163	'ch': [],
164	'dh': [],
165	'bh': [],
166
167	'ax': [],
168	'dx': [],
169	'cx': [],
170	'bx': [],
171	'sp': [],
172	'bp': [],
173	'si': [],
174	'di': [],
175
176	'eax': [],
177	'edx': [],
178	'ecx': [],
179	'ebx': [],
180	'esp': [],
181	'ebp': [],
182	'esi': [],
183	'edi': [],
184
185	'rax': [],
186	'rdx': [],
187	'rcx': [],
188	'rbx': [],
189	'rsp': [],
190	'rbp': [],
191	'rsi': [],
192	'rdi': [],
193	};
194	## Types
195	kdTypes = {
196	'db': (1, 'unsigned' ),
197	'dw': (2, 'unsigned' ),
198	'dd': (4, 'unsigned' ),
199	'dq': (8, 'unsigned' ),
200	'uint': (8, 'unsigned' ),
201	'int': (8, 'unsigned' ),
202	};
203
204	def __init__(self, sField, sOp, sValue, sType):
205	assert sField in self.kdFields;
206	assert sOp in self.kasOperators;
207	self.sField = sField;
208	self.sOp = sOp;
209	self.sValue = sValue;
210	self.sType = sType;
211
212
213	class TestSelector(object):
214	"""
215	One selector for an instruction test.
216	"""
217	## Selector compare operators.
218	kasCompareOps = [ '==', '!=' ];
219	## Selector variables and their valid values.
220	kdVariables = {
221	# Operand size.
222	'size': {
223	'o16': 'size_o16',
224	'o32': 'size_o32',
225	'o64': 'size_o64',
226	},
227	# Execution ring.
228	'ring': {
229	'0': 'ring_0',
230	'1': 'ring_1',
231	'2': 'ring_2',
232	'3': 'ring_3',
233	'0..2': 'ring_0_thru_2',
234	'1..3': 'ring_1_thru_3',
235	},
236	# Basic code mode.
237	'codebits': {
238	'64': 'code_64bit',
239	'32': 'code_32bit',
240	'16': 'code_16bit',
241	},
242	# cpu modes.
243	'mode': {
244	'real': 'mode_real',
245	'prot': 'mode_prot',
246	'long': 'mode_long',
247	'v86': 'mode_v86',
248	'smm': 'mode_smm',
249	'vmx': 'mode_vmx',
250	'svm': 'mode_svm',
251	},
252	# paging on/off
253	'paging': {
254	'on': 'paging_on',
255	'off': 'paging_off',
256	},
257	};
258
259	def __init__(self, sVariable, sOp, sValue):
260	assert sVariable in self.kdVariables;
261	assert sOp in self.kasCompareOps;
262	assert sValue in self.kdVariables[sValue];
263	self.sVariable = sVariable;
264	self.sOp = sOp;
265	self.sValue = sValue;
266
267
268	class InstructionTest(object):
269	"""
270	Instruction test.
271	"""
272
273	def __init__(self, oInstr): # type: (InstructionTest, Instruction)
274	self.oInstr = oInstr; # type: InstructionTest
275	self.aoInputs = [];
276	self.aoOutputs = [];
277	self.aoSelectors = []; # type: list(TestSelector)
278
279
280	class Operand(object):
281	"""
282	Instruction operand.
283	"""
284
285	## @op[1-4]
286	kdLocations = {
287	'reg': [], ## modrm.reg
288	'rm': [], ## modrm.rm
289	};
290
291	## @op[1-4]
292	kdTypes = {
293	'Eb': [],
294	'Gb': [],
295	};
296
297	def __init__(self, sWhere, sType):
298	assert sWhere in self.kdLocations;
299	assert sType in self.kdTypes;
300	self.sWhere = sWhere; ##< kdLocations
301	self.sType = sType; ##< kdTypes
302
303
304	class Instruction(object):
305	"""
306	Instruction.
307	"""
308
309	def __init__(self, sSrcFile, iLine):
310	## @name Core attributes.
311	## @{
312	self.sMnemonic = None;
313	self.sBrief = None;
314	self.asDescSections = []; # type: list(str)
315	self.aoMaps = []; # type: list(InstructionMap)
316	self.aoOperands = []; # type: list(Operand)
317	self.sPrefix = None; ##< Single prefix: None, 0x66, 0xf3, 0xf2
318	self.sOpcode = None;
319	self.sEncoding = None;
320	self.asFlTest = None;
321	self.asFlModify = None;
322	self.asFlUndefined = None;
323	self.asFlSet = None;
324	self.asFlClear = None;
325	self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictioarny for speed; dummy value).
326	self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
327	self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
328	self.aoTests = []; # type: list(InstructionTest)
329	self.oCpus = None; ##< Some CPU restriction expression...
330	self.sGroup = None;
331	self.fUnused = False; ##< Unused instruction.
332	self.fInvalid = False; ##< Invalid instruction (like UD2).
333	self.sInvalidStyle = None; ##< Invalid behviour style
334	## @}
335
336	## @name Implementation attributes.
337	## @{
338	self.sStats = None;
339	self.sFunction = None;
340	self.fStub = False;
341	self.fUdStub = False;
342	## @}
343
344	## @name Decoding info
345	## @{
346	self.sSrcFile = sSrcFile;
347	self.iLineCreated = iLine;
348	self.iLineCompleted = None;
349	self.cOpTags = 0;
350	## @}
351
352	## @name Intermediate input fields.
353	## @{
354	self.sRawDisOpNo = None;
355	self.asRawDisParams = [];
356	self.sRawIemOpFlags = None;
357	self.sRawOldOpcodes = None;
358	## @}
359
360
361	## All the instructions.
362	g_aoAllInstructions = []; # type: Instruction
363
364	## Instruction maps.
365	g_dInstructionMaps = {
366	'one': InstructionMap('one'),
367	'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
368	'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
369	'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
370	'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
371	'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
372	'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
373	'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
374	'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
375	'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
376	'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
377	'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
378	'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
379	'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
380	'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
381	'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
382	'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
383	'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
384	'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
385	'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
386
387	'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',]),
388	'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
389	'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
390	'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
391	'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
392	'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
393	'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
394	'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
395	'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
396	'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
397	'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'mod /r'),
398	'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
399	'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
400	'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
401
402	'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
403	'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
404	'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
405
406	'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
407	'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
408	'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
409	'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
410	'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'mod /r'),
411	'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
412
413	'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
414	'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
415
416	'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
417	'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
418	'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
419	'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
420	'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
421	'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
422	'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
423	};
424
425
426
427	class ParserException(Exception):
428	""" Parser exception """
429	def __init__(self, sMessage):
430	Exception.__init__(self, sMessage);
431
432
433	class SimpleParser(object):
434	"""
435	Parser of IEMAllInstruction*.cpp.h instruction specifications.
436	"""
437
438	## @name Parser state.
439	## @{
440	kiCode = 0;
441	kiCommentMulti = 1;
442	## @}
443
444	def __init__(self, sSrcFile, asLines, sDefaultMap):
445	self.sSrcFile = sSrcFile;
446	self.asLines = asLines;
447	self.iLine = 0;
448	self.iState = self.kiCode;
449	self.sComment = '';
450	self.iCommentLine = 0;
451	self.asCurInstr = [];
452
453	assert sDefaultMap in g_dInstructionMaps;
454	self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
455
456	self.cTotalInstr = 0;
457	self.cTotalStubs = 0;
458	self.cTotalTagged = 0;
459
460	self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
461	self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
462	self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
463	self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
464	self.oReGroupName = re.compile('^op_[a-z0-9]+(\|_[a-z0-9]+\|_[a-z0-9]+_[a-z0-9]+)$');
465	self.fDebug = True;
466
467	self.dTagHandlers = {
468	'@opbrief': self.parseTagOpBrief,
469	'@opdesc': self.parseTagOpDesc,
470	'@opmnemonic': self.parseTagOpMnemonic,
471	'@op1': self.parseTagOpOperandN,
472	'@op2': self.parseTagOpOperandN,
473	'@op3': self.parseTagOpOperandN,
474	'@op4': self.parseTagOpOperandN,
475	'@oppfx': self.parseTagOpPfx,
476	'@opmaps': self.parseTagOpMaps,
477	'@opcode': self.parseTagOpcode,
478	'@openc': self.parseTagOpEnc,
479	'@opfltest': self.parseTagOpEFlags,
480	'@opflmodify': self.parseTagOpEFlags,
481	'@opflundef': self.parseTagOpEFlags,
482	'@opflset': self.parseTagOpEFlags,
483	'@opflclear': self.parseTagOpEFlags,
484	'@ophints': self.parseTagOpHints,
485	'@opcpuid': self.parseTagOpCpuId,
486	'@opgroup': self.parseTagOpGroup,
487	'@opunused': self.parseTagOpUnusedInvalid,
488	'@opinvalid': self.parseTagOpUnusedInvalid,
489	'@opinvlstyle': self.parseTagOpUnusedInvalid,
490	'@optest': self.parseTagOpTest,
491	'@opstats': self.parseTagOpStats,
492	'@opfunction': self.parseTagOpFunction,
493	'@opdone': self.parseTagOpDone,
494	};
495
496	self.asErrors = [];
497
498	def raiseError(self, sMessage):
499	"""
500	Raise error prefixed with the source and line number.
501	"""
502	raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
503
504	def raiseCommentError(self, iLineInComment, sMessage):
505	"""
506	Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
507	"""
508	raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
509
510	def error(self, sMessage):
511	"""
512	Adds an error.
513	returns False;
514	"""
515	self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
516	return False;
517
518	def errorComment(self, iLineInComment, sMessage):
519	"""
520	Adds a comment error.
521	returns False;
522	"""
523	self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
524	return False;
525
526	def printErrors(self):
527	"""
528	Print the errors to stderr.
529	Returns number of errors.
530	"""
531	if len(self.asErrors) > 0:
532	sys.stderr.write(u''.join(self.asErrors));
533	return len(self.asErrors);
534
535	def debug(self, sMessage):
536	"""
537	"""
538	if self.fDebug:
539	print 'debug: %s' % (sMessage,);
540
541	def addInstruction(self, iLine = None):
542	"""
543	Adds an instruction.
544	"""
545	oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
546	g_aoAllInstructions.append(oInstr);
547	self.asCurInstr.append(oInstr);
548	return oInstr;
549
550	def doneInstructionOne(self, oInstr, iLine):
551	"""
552	Complete the parsing by processing, validating and expanding raw inputs.
553	"""
554	assert oInstr.iLineCompleted is None;
555	oInstr.iLineCompleted = iLine;
556
557	#
558	# Specified instructions.
559	#
560	if oInstr.cOpTags > 0:
561	if oInstr.sStats is None:
562	pass;
563
564	#
565	# Unspecified legacy stuff. We generally only got a few things to go on here.
566	# /** Opcode 0x0f 0x00 /0. */
567	# FNIEMOPRM_DEF(iemOp_Grp6_sldt)
568	#
569	else:
570	#if oInstr.sRawOldOpcodes:
571	#
572	#if oInstr.sMnemonic:
573	pass;
574
575	#
576	# Apply default map and then add the instruction to all it's groups.
577	#
578	if len(oInstr.aoMaps) == 0:
579	oInstr.aoMaps = [ self.oDefaultMap, ];
580	for oMap in oInstr.aoMaps:
581	oMap.aoInstructions.append(oInstr);
582
583	self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
584	return True;
585
586	def doneInstructions(self, iLineInComment = None):
587	"""
588	Done with current instruction.
589	"""
590	for oInstr in self.asCurInstr:
591	self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
592	if oInstr.fStub:
593	self.cTotalStubs += 1;
594
595	self.cTotalInstr += len(self.asCurInstr);
596
597	self.sComment = '';
598	self.asCurInstr = [];
599	return True;
600
601	def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
602	"""
603	Sets the sAttrib of all current instruction to oValue. If fOverwrite
604	is False, only None values and empty strings are replaced.
605	"""
606	for oInstr in self.asCurInstr:
607	if fOverwrite is not True:
608	oOldValue = getattr(oInstr, sAttrib);
609	if oOldValue is not None:
610	continue;
611	setattr(oInstr, sAttrib, oValue);
612
613	def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
614	"""
615	Sets the iEntry of the array sAttrib of all current instruction to oValue.
616	If fOverwrite is False, only None values and empty strings are replaced.
617	"""
618	for oInstr in self.asCurInstr:
619	aoArray = getattr(oInstr, sAttrib);
620	while len(aoArray) <= iEntry:
621	aoArray.append(None);
622	if fOverwrite is True or aoArray[iEntry] is None:
623	aoArray[iEntry] = oValue;
624
625	def parseCommentOldOpcode(self, asLines):
626	""" Deals with 'Opcode 0xff /4' like comments """
627	asWords = asLines[0].split();
628	if len(asWords) >= 2 \
629	and asWords[0] == 'Opcode' \
630	and ( asWords[1].startswith('0x')
631	or asWords[1].startswith('0X')):
632	asWords = asWords[:1];
633	for iWord, sWord in enumerate(asWords):
634	if sWord.startswith('0X'):
635	sWord = '0x' + sWord[:2];
636	asWords[iWord] = asWords;
637	self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
638
639	return False;
640
641	def ensureInstructionForOpTag(self, iTagLine):
642	""" Ensure there is an instruction for the op-tag being parsed. """
643	if len(self.asCurInstr) == 0:
644	self.addInstruction(self.iCommentLine + iTagLine);
645	for oInstr in self.asCurInstr:
646	oInstr.cOpTags += 1;
647	if oInstr.cOpTags == 1:
648	self.cTotalTagged += 1;
649	return self.asCurInstr[-1];
650
651	@staticmethod
652	def flattenSections(aasSections):
653	"""
654	Flattens multiline sections into stripped single strings.
655	Returns list of strings, on section per string.
656	"""
657	asRet = [];
658	for asLines in assSections:
659	if len(asLines) > 0:
660	asRet.append(' '.join([sLine.strip() for sLine in asLines]));
661	return asRet;
662
663	@staticmethod
664	def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
665	"""
666	Flattens sections into a simple stripped string with newlines as
667	section breaks. The final section does not sport a trailing newline.
668	"""
669	# Typical: One section with a single line.
670	if len(aasSections) == 1 and len(aasSections[0]) == 1:
671	return aasSections[0][0].strip();
672
673	sRet = '';
674	for iSection, asLines in enumerate(aasSections):
675	if len(asLines) > 0:
676	if iSection > 0:
677	sRet += sSectionSep;
678	sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
679	return sRet;
680
681
682
683	## @name Tag parsers
684	## @{
685
686	def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
687	"""
688	Key: @opbrief
689	Value: Text description, multiple sections, appended.
690
691	Brief description. If not given, it's the first sentence from @opdesc.
692	"""
693	oInstr = self.ensureInstructionForOpTag(iTagLine);
694
695	# Flatten and validate the value.
696	sBrief = self.flattenAllSections(aasSections);
697	if len(sBrief) == 0:
698	return self.errorComment(iTagLine, '%s: value required' % (sTag,));
699	if sBrief[-1] != '.':
700	sBrief = sBrief + '.';
701	if len(sBrief) > 180:
702	return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
703	offDot = sBrief.find('.');
704	while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
705	offDot = sBrief.find('.', offDot + 1);
706	if offDot >= 0 and offDot != len(sBrief) - 1:
707	return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
708
709	# Update the instruction.
710	if oInstr.sBrief is not None:
711	return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
712	% (sTag, oInstr.sBrief, sBrief,));
713	_ = iEndLine;
714	return True;
715
716	def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
717	"""
718	Key: @opdesc
719	Value: Text description, multiple sections, appended.
720
721	It is used to describe instructions.
722	"""
723	oInstr = self.ensureInstructionForOpTag(iTagLine);
724	if len(self.aoInstructions) > 0 and len(aasSections) > 0:
725	oInstr.asDescSections.extend(self.flattenSections(aasSections));
726	return True;
727
728	_ = sTag; _ = iEndLine;
729	return True;
730
731	def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
732	"""
733	Tag: @opmenmonic
734	Value: mnemonic
735
736	The 'mnemonic' value must be a valid C identifier string. Because of
737	prefixes, groups and whatnot, there times when the mnemonic isn't that
738	of an actual assembler mnemonic.
739	"""
740	oInstr = self.ensureInstructionForOpTag(iTagLine);
741
742	# Flatten and validate the value.
743	sMnemonic = self.flattenAllSections(aasSections);
744	if not self.oReMnemonic.match(sMnemonic):
745	return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
746	if oInstr.sMnemonic is not None:
747	return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
748	% (sTag, oInstr.sMnemonic, sMnemonic,));
749	oInstr.sMnemonic = sMnemonic
750
751	_ = iEndLine;
752	return True;
753
754	def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
755	"""
756	Tags: @op1, @op2, @op3, @op4
757	Value: where:type
758
759	The 'where' value indicates where the operand is found, like the 'reg'
760	part of the ModR/M encoding. See Instruction.kdOperandLocations for
761	a list.
762
763	The 'type' value indicates the operand type. These follow the types
764	given in the opcode tables in the CPU reference manuals.
765	See Instruction.kdOperandTypes for a list.
766
767	"""
768	oInstr = self.ensureInstructionForOpTag(iTagLine);
769	idxOp = int(sTag[-1]) - 1;
770	assert idxOp >= 0 and idxOp < 4;
771
772	# flatten, split up, and validate the "where:type" value.
773	sFlattened = self.flattenAllSections(aasSections);
774	asSplit = sFlattened.split(':');
775	if len(asSplit) != 2:
776	return self.errorComment(iTagLine, 'expected %s value on format "<where>:<type>" not "%s"' % (sTag, sFlattened,));
777
778	(sWhere, sType) = asSplit;
779	if sWhere not in Operand.kdLocations:
780	return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
781	% (sTag, sWhere, ', '.join(Operand.kdLocations.keys()),), iTagLine);
782
783	if sType not in Operand.kdTypes:
784	return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
785	% (sTag, sType, ', '.join(Operand.kdTypes.keys()),));
786
787	# Insert the operand, refusing to overwrite an existing one.
788	while idxOp >= len(oInstr.aoOperands):
789	oInstr.aoOperands.append(None);
790	if oInstr.aoOperands[idxOp] is not None:
791	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
792	% ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
793	sWhere, sType,));
794	oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
795
796	_ = iEndLine;
797	return True;
798
799	def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
800	"""
801	Tag: @opmaps
802	Value: map[,map2]
803
804	Indicates which maps the instruction is in. There is a default map
805	associated with each input file.
806	"""
807	oInstr = self.ensureInstructionForOpTag(iTagLine);
808
809	# Flatten, split up and validate the value.
810	sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
811	asMaps = sFlattened.split(',');
812	if len(asMaps) == 0:
813	return self.errorComment(iTagLine, '%s: value required' % (sTag,));
814	for sMap in asMaps:
815	if sMap not in g_dInstructionMaps:
816	return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
817	% (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
818
819	# Add the maps to the current list. Throw errors on duplicates.
820	for oMap in oInstr.aoMaps:
821	if oMap.sName in asMaps:
822	return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
823
824	for sMap in asMaps:
825	oMap = g_dInstructionMaps[sMap];
826	if oMap not in oInstr.aoMaps:
827	oInstr.aoMaps.append(oMap);
828	else:
829	self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
830
831	_ = iEndLine;
832	return True;
833
834	## @oppfx values.
835	kdPrefixes = {
836	'0x66': [],
837	'0xf3': [],
838	'0xf2': [],
839	};
840
841	def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
842	"""
843	Tag: @oppfx
844	Value: none\|0x66\|0xf3\|0xf2
845
846	Required prefix for the instruction. (In a (E)VEX context this is the
847	value of the 'pp' field rather than an actual prefix.)
848	"""
849	oInstr = self.ensureInstructionForOpTag(iTagLine);
850
851	# Flatten and validate the value.
852	sFlattened = self.flattenAllSections(aasSections);
853	asPrefixes = sFlattened.split();
854	if len(asPrefixes) > 1:
855	return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
856
857	sPrefix = asPrefixes[0].lower();
858	if sPrefix == 'none':
859	sPrefix = None;
860	else:
861	if len(sPrefix) == 2:
862	sPrefix = '0x' + sPrefix;
863	if _isValidOpcodeByte(sPrefix):
864	return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
865
866	if sPrefix is not None and sPrefix not in self.kdPrefixes:
867	return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, self.kdPrefixes,));
868
869	# Set it.
870	if oInstr.sPrefix is not None:
871	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
872	oInstr.sPrefix = sPrefix;
873
874	_ = iEndLine;
875	return True;
876
877	## Special @opcode tag values.
878	kdSpecialOpcodes = {
879	'/reg': [],
880	'mr/reg': [],
881	'11 /reg': [],
882	'!11 /reg': [],
883	'11 mr/reg': [],
884	'!11 mr/reg': [],
885	};
886
887	def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
888	"""
889	Tag: @opcode
890	Value: 0x?? \| /reg \| mr/reg \| 11 /reg \| !11 /reg \| 11 mr/reg \| !11 mr/reg
891
892	The opcode byte or sub-byte for the instruction in the context of a map.
893	"""
894	oInstr = self.ensureInstructionForOpTag(iTagLine);
895
896	# Flatten and validate the value.
897	sOpcode = self.flattenAllSections(aasSections);
898	if sOpcode in self.kdSpecialOpcodes:
899	pass;
900	elif not _isValidOpcodeByte(sOpcode):
901	return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
902
903	# Set it.
904	if oInstr.sOpcode is not None:
905	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
906	oInstr.sOpcode = sOpcode;
907
908	_ = iEndLine;
909	return True;
910
911	## Valid values for @openc
912	kdEncodings = {
913	'ModR/M': [],
914	};
915
916	def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
917	"""
918	Tag: @openc
919	Value: ModR/M\|TBD
920
921	The instruction operand encoding style.
922	"""
923	oInstr = self.ensureInstructionForOpTag(iTagLine);
924
925	# Flatten and validate the value.
926	sEncoding = self.flattenAllSections(aasSections);
927	if sEncoding in self.kdEncodings:
928	pass;
929	elif not _isValidOpcodeByte(sEncoding):
930	return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
931
932	# Set it.
933	if oInstr.sEncoding is not None:
934	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
935	% ( sTag, oInstr.sEncoding, sEncoding,));
936	oInstr.sEncoding = sEncoding;
937
938	_ = iEndLine;
939	return True;
940
941	## EFlags values allowed in @opfltest, @opflmodify, @opflundef, @opflset, and @opflclear.
942	kdEFlags = {
943	# Debugger flag notation:
944	'ov': 'X86_EFL_OF', ##< OVerflow.
945	'nv': '!X86_EFL_OF', ##< No Overflow.
946
947	'ng': 'X86_EFL_SF', ##< NeGative (sign).
948	'pl': '!X86_EFL_SF', ##< PLuss (sign).
949
950	'zr': 'X86_EFL_ZF', ##< ZeRo.
951	'nz': '!X86_EFL_ZF', ##< No Zero.
952
953	'af': 'X86_EFL_AF', ##< Aux Flag.
954	'na': '!X86_EFL_AF', ##< No Aux.
955
956	'po': 'X86_EFL_PF', ##< Parity Pdd.
957	'pe': '!X86_EFL_PF', ##< Parity Even.
958
959	'cf': 'X86_EFL_CF', ##< Carry Flag.
960	'nc': '!X86_EFL_CF', ##< No Carry.
961
962	'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
963	'di': '!X86_EFL_IF', ##< Disabled Interrupts.
964
965	'dn': 'X86_EFL_DF', ##< DowN (string op direction).
966	'up': '!X86_EFL_DF', ##< UP (string op direction).
967
968	'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
969	'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
970	'ac': 'X86_EFL_AC', ##< Alignment Check.
971	'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
972	'rf': 'X86_EFL_RF', ##< Resume Flag.
973	'nt': 'X86_EFL_NT', ##< Nested Task.
974	'tf': 'X86_EFL_TF', ##< Trap flag.
975
976	# Reference manual notation:
977	'of': 'X86_EFL_OF',
978	'sf': 'X86_EFL_SF',
979	'zf': 'X86_EFL_ZF',
980	'cf': 'X86_EFL_CF',
981	'pf': 'X86_EFL_PF',
982	'if': 'X86_EFL_IF',
983	'df': 'X86_EFL_DF',
984	'iopl': 'X86_EFL_IOPL',
985	'id': 'X86_EFL_ID',
986	};
987
988	## EFlags tag to Instruction attribute name.
989	kdOpFlagToAttr = {
990	'@opfltest': 'asFlTest',
991	'@opflmodify': 'asFlModify',
992	'@opflundef': 'asFlUndefined',
993	'@opflset': 'asFlSet',
994	'@opflclear': 'asFlClear',
995	};
996
997	def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
998	""" @opfltest, @opflmodify, @opflundef, @opflset, @opflclear """
999	oInstr = self.ensureInstructionForOpTag(iTagLine);
1000
1001	# Flatten, split up and validate the values.
1002	asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
1003	if len(asFlags) == 1 and asFlags[0].lower() == 'none':
1004	asFlags = [];
1005	else:
1006	fRc = True;
1007	for iFlag, sFlag in enumerate(asFlags):
1008	if sFlag not in self.kdEFlags:
1009	if sFlag.strip() in self.kdEFlags:
1010	asFlags[iFlag] = sFlag.strip();
1011	else:
1012	fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
1013	if not fRc:
1014	return False;
1015
1016	# Set them.
1017	asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
1018	if asOld is not None:
1019	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
1020	setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
1021
1022	_ = iEndLine;
1023	return True;
1024
1025	## @ophints values.
1026	kdHints = {
1027	'invalid': 'DISOPTYPE_INVALID', ##<
1028	'harmless': 'DISOPTYPE_HARMLESS', ##<
1029	'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
1030	'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
1031	'dangerous': 'DISOPTYPE_DANGEROUS', ##<
1032	'portio': 'DISOPTYPE_PORTIO', ##<
1033	'privileged': 'DISOPTYPE_PRIVILEGED', ##<
1034	'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
1035	'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
1036	'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
1037	'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
1038	'interrupt': 'DISOPTYPE_INTERRUPT', ##<
1039	'illegal': 'DISOPTYPE_ILLEGAL', ##<
1040	'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0. */
1041	'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0. */
1042	'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
1043	'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
1044	'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
1045	'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode */
1046	'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode */
1047	'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size */
1048	'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes */
1049	'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte */
1050	'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b */
1051	'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes (only in 16 & 32 bits mode!) */
1052	'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet! */
1053	'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet! */
1054	'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet! */
1055	};
1056
1057	def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
1058	"""
1059	Tag: @ophints
1060	Value: Comma or space separated list of flags and hints.
1061
1062	This covers the disassembler flags table and more.
1063	"""
1064	oInstr = self.ensureInstructionForOpTag(iTagLine);
1065
1066	# Flatten as a space separated list, split it up and validate the values.
1067	asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1068	if len(asHints) == 1 and asHints[0].lower() == 'none':
1069	asHints = [];
1070	else:
1071	fRc = True;
1072	for iHint, sHint in enumerate(asHints):
1073	if sHint not in self.kdHints:
1074	if sHint.strip() in self.kdHints:
1075	sHint[iHint] = sHint.strip();
1076	else:
1077	fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
1078	if not fRc:
1079	return False;
1080
1081	# Append them.
1082	for sHint in asHints:
1083	if sHint not in oInstr.dHints:
1084	oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
1085	else:
1086	self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
1087
1088	_ = iEndLine;
1089	return True;
1090
1091	## @opcpuid
1092	kdCpuIdFlags = {
1093	'vme': 'X86_CPUID_FEATURE_EDX_VME',
1094	'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
1095	'msr': 'X86_CPUID_FEATURE_EDX_MSR',
1096	'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
1097	'sep': 'X86_CPUID_FEATURE_EDX_SEP',
1098	'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
1099	'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
1100	'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
1101	'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
1102	'sse': 'X86_CPUID_FEATURE_EDX_SSE',
1103	'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
1104	'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
1105	'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
1106	'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
1107	'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
1108	'smx': 'X86_CPUID_FEATURE_ECX_TM2',
1109	'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
1110	'fma': 'X86_CPUID_FEATURE_ECX_FMA',
1111	'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
1112	'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
1113	'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
1114	'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
1115	'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
1116	'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
1117	'aes': 'X86_CPUID_FEATURE_ECX_AES',
1118	'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
1119	'avx': 'X86_CPUID_FEATURE_ECX_AVX',
1120	'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
1121	'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
1122
1123	'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
1124	'3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
1125	'3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
1126	'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
1127	'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
1128	'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
1129	'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
1130	'3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
1131	'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
1132	'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
1133	};
1134
1135	def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
1136	"""
1137	Tag: @opcpuid
1138	Value: none \| <CPUID flag specifier>
1139
1140	CPUID feature bit which is required for the instruction to be present.
1141	"""
1142	oInstr = self.ensureInstructionForOpTag(iTagLine);
1143
1144	# Flatten as a space separated list, split it up and validate the values.
1145	asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1146	if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
1147	asCpuIds = [];
1148	else:
1149	fRc = True;
1150	for iCpuId, sCpuId in enumerate(asCpuIds):
1151	if sCpuId not in self.kdCpuIds:
1152	if sCpuId.strip() in self.kdCpuIds:
1153	sCpuId[iCpuId] = sCpuId.strip();
1154	else:
1155	fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
1156	if not fRc:
1157	return False;
1158
1159	# Append them.
1160	for sCpuId in asCpuIds:
1161	if sCpuId not in oInstr.asCpuIds:
1162	oInstr.asCpuIds.append(sCpuId);
1163	else:
1164	self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
1165
1166	_ = iEndLine;
1167	return True;
1168
1169	def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
1170	"""
1171	Tag: @opgroup
1172	Value: op_grp1[_subgrp2[_subsubgrp3]]
1173
1174	Instruction grouping.
1175	"""
1176	oInstr = self.ensureInstructionForOpTag(iTagLine);
1177
1178	# Flatten as a space separated list, split it up and validate the values.
1179	asGroups = self.flattenAllSections(aasSections).split();
1180	if len(asGroups) != 1:
1181	return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
1182	sGroup = asGroups[0];
1183	if not self.oReGroupName.match(sGroup):
1184	return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
1185	% (sTag, sGroup, self.oReGroupName.pattern));
1186
1187	# Set it.
1188	if oInstr.sGroup is not None:
1189	return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
1190	oInstr.sGroup = sGroup;
1191
1192	_ = iEndLine;
1193	return True;
1194
1195	## @opunused, @opinvalid, @opinvlstyle
1196	kdInvalidStyles = {
1197	'immediate': [], ##< CPU stops decoding immediately after the opcode.
1198	'intel-modrm': [], ##< Intel decodes ModR/M.
1199	'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
1200	'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
1201	'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
1202	};
1203
1204	def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
1205	"""
1206	Tag: @opunused, @opinvalid, @opinvlstyle
1207	Value: <invalid opcode behaviour style>
1208
1209	The @opunused indicates the specification is for a currently unused
1210	instruction encoding.
1211
1212	The @opinvalid indicates the specification is for an invalid currently
1213	instruction encoding (like UD2).
1214
1215	The @opinvlstyle just indicates how CPUs decode the instruction when
1216	not supported (@opcpuid, @opmincpu) or disabled.
1217	"""
1218	oInstr = self.ensureInstructionForOpTag(iTagLine);
1219
1220	# Flatten as a space separated list, split it up and validate the values.
1221	asStyles = self.flattenAllSections(aasSections).split();
1222	if len(asStyles) != 1:
1223	return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
1224	sStyle = asStyles[0];
1225	if sStyle not in self.kdInvalidStyle:
1226	return self.errorComment(iTagLine, '%s: invalid invalid behviour style: %s (valid: %s)'
1227	% (sTag, sStyle, self.kdInvalidStyles.keys(),));
1228	# Set it.
1229	if oInstr.sInvlStyle is not None:
1230	return self.errorComment(iTagLine,
1231	'%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
1232	% ( sTag, oInstr.sInvlStyle, sStyle,));
1233	oInstr.sInvlStyle = sStyle;
1234	if sTag == '@opunused':
1235	oInstr.fUnused = True;
1236	elif sTag == '@opinvalid':
1237	oInstr.fInvalid = True;
1238
1239	_ = iEndLine;
1240	return True;
1241
1242	def validateTestInputValueByType(self, sType, sValue):
1243	"""
1244	Validates the value given the type.
1245	"""
1246	_ = sType;
1247	_ = sValue;
1248	return True;
1249
1250	def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine):
1251	"""
1252	Tag: @optest
1253	Value: [<selectors>[ ]?] <inputs> -> <outputs>
1254	Example: mode==64bit ? in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
1255
1256	The main idea here is to generate basic instruction tests.
1257
1258	The probably simplest way of handling the diverse input, would be to use
1259	it to produce size optimized byte code for a simple interpreter that
1260	modifies the register input and output states.
1261
1262	There are alternatives to the interpreter would be create multiple tables,
1263	but that becomes rather complicated wrt what goes where and then to use
1264	them in an efficient manner.
1265	"""
1266	oInstr = self.ensureInstructionForOpTag(iTagLine);
1267
1268	#
1269	# Do it section by section.
1270	#
1271	for asSectionLines in aasSections:
1272	#
1273	# Sort the input into outputs, inputs and selector conditions.
1274	#
1275	sFlatSection = self.flattenAllSections([asSectionLines,]);
1276	if len(sFlatSection) == 0:
1277	self.errorComment(iTagLine, '%s: missing value' % ( sTag,));
1278	continue;
1279	oTest = InstructionTest(oInstr);
1280
1281	asSelectors = [];
1282	asInputs = [];
1283	asOutputs = [];
1284	asCur = asOutputs;
1285	fRc = True;
1286	asWords = sFlatSection.split();
1287	for iWord in range(len(asWords) - 1, -1, -1):
1288	sWord = asWords[iWord];
1289	# Check for array switchers.
1290	if sWord == '->':
1291	if asCur != asOutputs:
1292	fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % ( sTag, sFlatSection));
1293	break;
1294	asCur = asInputs;
1295	elif sWord == '/':
1296	if asCur != asInputs:
1297	fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % ( sTag, sFlatSection));
1298	break;
1299	asCur = asSelectors;
1300	else:
1301	asCur.insert(0, sWord);
1302
1303	#
1304	# Validate and add selectors.
1305	#
1306	for sCond in asSelectors:
1307	oSelector = None;
1308	for sOp in TestSelector.kasCompareOps:
1309	off = sCond.find(sOp);
1310	if off >= 0:
1311	sVariable = sCond[:off];
1312	sValue = sCond[off + len(sOp):];
1313	if sVariable in TestSelector.kdVariables:
1314	if sValue in TestSelector.kdVariables[sVariable]:
1315	oSelector = TestSelector(sVariable, sOp, sValue);
1316	else:
1317	self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
1318	% ( sTag, sValue, sCond,
1319	TestSelector.kdVariables[sVariable].keys(),));
1320	else:
1321	self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
1322	% ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
1323	break;
1324	if oSelector is not None:
1325	for oExisting in oTest.aoSelectors:
1326	if oExisting.sVariable == oSelector.sVariable:
1327	self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
1328	% ( sTag, oSelector.sVariable, oExisting, oSelector,));
1329	oTest.aoSelectors.append(oSelector);
1330	else:
1331	fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
1332
1333	#
1334	# Validate outputs and inputs, adding them to the test as we go along.
1335	#
1336	for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
1337	for sItem in asItems:
1338	oItem = None;
1339	for sOp in TestInOut.kasOperators:
1340	off = sItem.find(sOp);
1341	if off >= 0:
1342	sField = sItem[:off];
1343	sValueType = sItem[off + len(sOp):];
1344	if sField in TestInOut.kdFields:
1345	asSplit = sValueType.split(':', 1);
1346	sValue = asSplit[0];
1347	sType = asSplit[1] if len(asSplit) > 1 else 'int'; ## @todo figure the type handling...
1348	if sType in TestInOut.kdTypes:
1349	if self.validateTestInputValueByType(sType, sValue):
1350	oItem = TestInOut(sField, sOp, sValue, sType);
1351	else:
1352	self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
1353	% ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
1354	else:
1355	self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s" (valid fields: %s)'
1356	% ( sTag, sDesc, sField, sItem, TestInOut.kdFields.keys(),));
1357	break;
1358	if oItem is not None:
1359	for oExisting in aoDst:
1360	if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
1361	self.errorComment(iTagLine,
1362	'%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
1363	% ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
1364	aoDst.append(oItem);
1365	else:
1366	fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sItem,));
1367
1368	#
1369	# .
1370	#
1371	if fRc:
1372	oInstr.aoTests.append(oTest);
1373	else:
1374	self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
1375	self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
1376	% (sTag, asSelectors, asInputs, asOutputs,));
1377
1378	return True;
1379
1380	def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
1381	"""
1382	Tag: @opfunction
1383	Value: <VMM function name>
1384
1385	This is for explicitly setting the IEM function name. Normally we pick
1386	this up from the FNIEMOP_XXX macro invocation after the description, or
1387	generate it from the mnemonic and operands.
1388
1389	It it thought it maybe necessary to set it when specifying instructions
1390	which implementation isn't following immediately or aren't implemented yet.
1391	"""
1392	oInstr = self.ensureInstructionForOpTag(iTagLine);
1393
1394	# Flatten and validate the value.
1395	sFunction = self.flattenAllSections(aasSections);
1396	if not self.oReFunctionName.match(sFunction):
1397	return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
1398	% (sTag, Name, self.oReFunctionName.pattern));
1399
1400	if oInstr.sFunction is not None:
1401	return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
1402	% (sTag, oInstr.sStats, sStats,));
1403	oInstr.sFunction = sFunction;
1404
1405	_ = iEndLine;
1406	return True;
1407
1408	def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
1409	"""
1410	Tag: @opstats
1411	Value: <VMM statistics base name>
1412
1413	This is for explicitly setting the statistics name. Normally we pick
1414	this up from the IEMOP_MNEMONIC macro invocation, or generate it from
1415	the mnemonic and operands.
1416
1417	It it thought it maybe necessary to set it when specifying instructions
1418	which implementation isn't following immediately or aren't implemented yet.
1419	"""
1420	oInstr = self.ensureInstructionForOpTag(iTagLine);
1421
1422	# Flatten and validate the value.
1423	sStats = self.flattenAllSections(aasSections);
1424	if not self.oReStatsName.match(sStats):
1425	return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
1426	% (sTag, Name, self.oReStatsName.pattern));
1427
1428	if oInstr.sStats is not None:
1429	return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
1430	% (sTag, oInstr.sStats, sStats,));
1431	oInstr.sStats = sStats;
1432
1433	_ = iEndLine;
1434	return True;
1435
1436	def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
1437	"""
1438	Tag: @opdone
1439	Value: none
1440
1441	Used to explictily flush the instructions that have been specified.
1442	"""
1443	sFlattened = self.flattenAllSections(aasSections);
1444	if sFlattened != '':
1445	return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
1446	_ = sTag; _ = iEndLine;
1447	return self.doneInstructions();
1448
1449	## @}
1450
1451
1452	def parseComment(self):
1453	"""
1454	Parse the current comment (self.sComment).
1455
1456	If it's a opcode specifiying comment, we reset the macro stuff.
1457	"""
1458	#
1459	# Reject if comment doesn't seem to contain anything interesting.
1460	#
1461	if self.sComment.find('Opcode') < 0 \
1462	and self.sComment.find('@') < 0:
1463	return False;
1464
1465	#
1466	# Split the comment into lines, removing leading asterisks and spaces.
1467	# Also remove leading and trailing empty lines.
1468	#
1469	asLines = self.sComment.split('\n');
1470	for iLine, sLine in enumerate(asLines):
1471	asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
1472
1473	while len(asLines) > 0 and len(asLines[0]) == 0:
1474	self.iCommentLine += 1;
1475	asLines.pop(0);
1476
1477	while len(asLines) > 0 and len(asLines[-1]) == 0:
1478	asLines.pop(len(asLines) - 1);
1479
1480	#
1481	# Check for old style: Opcode 0x0f 0x12
1482	#
1483	if asLines[0].startswith('Opcode '):
1484	self.parseCommentOldOpcode(asLines);
1485
1486	#
1487	# Look for @op* tagged data.
1488	#
1489	cOpTags = 0;
1490	sFlatDefault = None;
1491	sCurTag = '@default';
1492	iCurTagLine = 0;
1493	asCurSection = [];
1494	aasSections = [ asCurSection, ];
1495	for iLine, sLine in enumerate(asLines):
1496	if not sLine.startswith('@'):
1497	if len(sLine) > 0:
1498	asCurSection.append(sLine);
1499	elif len(asCurSection) != 0:
1500	asCurSection = [];
1501	aasSections.append(asCurSection);
1502	else:
1503	#
1504	# Process the previous tag.
1505	#
1506	if sCurTag in self.dTagHandlers:
1507	self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
1508	cOpTags += 1;
1509	elif sCurTag.startswith('@op'):
1510	self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
1511	elif sCurTag == '@default':
1512	sFlatDefault = self.flattenAllSections(aasSections);
1513
1514	#
1515	# New tag.
1516	#
1517	asSplit = sLine.split(None, 1);
1518	sCurTag = asSplit[0].lower();
1519	if len(asSplit) > 1:
1520	asCurSection = [asSplit[1],];
1521	else:
1522	asCurSection = [];
1523	aasSections = [asCurSection, ];
1524	iCurTagLine = iLine;
1525
1526	#
1527	# Don't allow default text in blocks containing @op*.
1528	#
1529	if cOpTags > 0 and len(sFlatDefault) > 0:
1530	self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
1531
1532	return True;
1533
1534	def parseMacroInvocation(self, sInvocation):
1535	"""
1536	Parses a macro invocation.
1537
1538	Returns a tuple, first element is the offset following the macro
1539	invocation. The second element is a list of macro arguments, where the
1540	zero'th is the macro name.
1541	"""
1542	# First the name.
1543	offOpen = sInvocation.find('(');
1544	if offOpen <= 0:
1545	raiseError("macro invocation open parenthesis not found");
1546	sName = sInvocation[:offOpen].strip();
1547	if not self.oReMacroName.match(sName):
1548	return self.error("invalid macro name '%s'" % (sName,));
1549	asRet = [sName, ];
1550
1551	# Arguments.
1552	iLine = self.iLine;
1553	cDepth = 1;
1554	off = offOpen + 1;
1555	offStart = off;
1556	while cDepth > 0:
1557	if off >= len(sInvocation):
1558	if iLine >= len(self.asLines):
1559	return self.error('macro invocation beyond end of file');
1560	sInvocation += self.asLines[iLine];
1561	iLine += 1;
1562	ch = sInvocation[off];
1563
1564	if ch == ',' or ch == ')':
1565	if cDepth == 1:
1566	asRet.append(sInvocation[offStart:off].strip());
1567	offStart = off + 1;
1568	if ch == ')':
1569	cDepth -= 1;
1570	elif ch == '(':
1571	cDepth += 1;
1572	off += 1;
1573
1574	return (off, asRet);
1575
1576	def findAndParseMacroInvocation(self, sCode, sMacro):
1577	"""
1578	Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
1579	"""
1580	offHit = sCode.find(sMacro);
1581	if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
1582	offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
1583	return (offHit + offAfter, asRet);
1584	return (len(sCode), None);
1585
1586	def findAndParseFirstMacroInvocation(self, sCode, asMacro):
1587	"""
1588	Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
1589	"""
1590	for sMacro in asMacro:
1591	offAfter, asRet = self.findAndParseMacroInvocation(sCode, sMacro);
1592	if asRet is not None:
1593	return (offAfter, asRet);
1594	return (len(sCode), None);
1595
1596	def checkCodeForMacro(self, sCode):
1597	"""
1598	Checks code for relevant macro invocation.
1599	"""
1600	#
1601	# Scan macro invocations.
1602	#
1603	if sCode.find('(') > 0:
1604	# Look for instruction decoder function definitions. ASSUME single line.
1605	(_, asArgs) = self.findAndParseFirstMacroInvocation(sCode,
1606	[ 'FNIEMOP_DEF',
1607	'FNIEMOP_STUB',
1608	'FNIEMOP_STUB_1',
1609	'FNIEMOP_UD_STUB',
1610	'FNIEMOP_UD_STUB_1' ]);
1611	if asArgs is not None:
1612	sFunction = asArgs[1];
1613
1614	if len(self.asCurInstr) == 0:
1615	self.addInstruction().sMnemonic = sFunction.split('_')[1];
1616	self.setInstrunctionAttrib('sFunction', sFunction);
1617	self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
1618	self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
1619	if asArgs[0].find('STUB') > 0:
1620	self.doneInstructions();
1621	return True;
1622
1623	# IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
1624	(_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
1625	if asArgs is not None:
1626	if len(self.asCurInstr) == 1:
1627	self.setInstrunctionAttrib('sStats', asArgs[1]);
1628	self.setInstrunctionAttrib('sMnemonic', asArgs[1].split('_')[0]);
1629
1630	# IEMOP_HLP_DECODED_NL_1(a_uDisOpNo, a_fIemOpFlags, a_uDisParam0, a_fDisOpType)
1631	(_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_HLP_DECODED_NL_1');
1632	if asArgs is not None:
1633	if len(self.asCurInstr) == 1:
1634	self.setInstrunctionAttrib('sRawDisOpNo', asArgs[1]);
1635	self.setInstrunctionAttrib('sRawIemOpFlags', asArgs[2]);
1636	self.setInstrunctionArrayAttrib('asRawDisParams', 0, asArgs[3]);
1637
1638	# IEMOP_HLP_DECODED_NL_2(a_uDisOpNo, a_fIemOpFlags, a_uDisParam0, a_uDisParam1, a_fDisOpType)
1639	(_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_HLP_DECODED_NL_2');
1640	if asArgs is not None:
1641	if len(self.asCurInstr) == 1:
1642	self.setInstrunctionAttrib('sRawDisOpNo', asArgs[1]);
1643	self.setInstrunctionAttrib('sRawIemOpFlags', asArgs[2]);
1644	self.setInstrunctionArrayAttrib('asRawDisParams', 0, asArgs[3]);
1645	self.setInstrunctionArrayAttrib('asRawDisParams', 1, asArgs[4]);
1646
1647	return False;
1648
1649
1650	def parse(self):
1651	"""
1652	Parses the given file.
1653	Returns number or errors.
1654	Raises exception on fatal trouble.
1655	"""
1656	self.debug('Parsing %s' % (self.sSrcFile,));
1657
1658	while self.iLine < len(self.asLines):
1659	sLine = self.asLines[self.iLine];
1660	self.iLine += 1;
1661
1662	# We only look for comments, so only lines with a slash might possibly
1663	# influence the parser state.
1664	if sLine.find('/') >= 0:
1665	#self.debug('line %d: slash' % (self.iLine,));
1666
1667	offLine = 0;
1668	while offLine < len(sLine):
1669	if self.iState == self.kiCode:
1670	offHit = sLine.find('/*', offLine); # only multiline comments for now.
1671	if offHit >= 0:
1672	self.sComment = '';
1673	self.iCommentLine = self.iLine;
1674	self.iState = self.kiCommentMulti;
1675	offLine = offHit + 2;
1676	else:
1677	offLine = len(sLine);
1678
1679	elif self.iState == self.kiCommentMulti:
1680	offHit = sLine.find('*/', offLine);
1681	if offHit >= 0:
1682	self.sComment += sLine[offLine:offHit];
1683	self.iState = self.kiCode;
1684	offLine = offHit + 2;
1685	self.parseComment();
1686	else:
1687	self.sComment += sLine[offLine:];
1688	offLine = len(sLine);
1689	else:
1690	assert False;
1691
1692	# No slash, but append the line if in multi-line comment.
1693	elif self.iState == self.kiCommentMulti:
1694	#self.debug('line %d: multi' % (self.iLine,));
1695	self.sComment += sLine;
1696
1697	# No slash, but check code line for relevant macro.
1698	elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
1699	#self.debug('line %d: macro' % (self.iLine,));
1700	self.checkCodeForMacro(sLine);
1701
1702	# If the line is a '}' in the first position, complete the instructions.
1703	elif self.iState == self.kiCode and sLine[0] == '}':
1704	#self.debug('line %d: }' % (self.iLine,));
1705	self.doneInstructions();
1706
1707	self.doneInstructions();
1708	self.debug('%s instructions in %s' % (self.cTotalInstr, self.sSrcFile,));
1709	self.debug('%s instruction stubs' % (self.cTotalStubs,));
1710	return self.printErrors();
1711
1712
1713	def __parseFileByName(sSrcFile, sDefaultMap):
1714	"""
1715	Parses one source file for instruction specfications.
1716	"""
1717	#
1718	# Read sSrcFile into a line array.
1719	#
1720	try:
1721	oFile = open(sSrcFile, "r");
1722	except Exception as oXcpt:
1723	raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
1724	try:
1725	asLines = oFile.readlines();
1726	except Exception as oXcpt:
1727	raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
1728	finally:
1729	oFile.close();
1730
1731	#
1732	# Do the parsing.
1733	#
1734	try:
1735	cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
1736	except ParserException as oXcpt:
1737	print unicode(oXcpt);
1738	raise;
1739	except Exception as oXcpt:
1740	raise;
1741
1742	return cErrors;
1743
1744
1745	def __parseAll():
1746	"""
1747	Parses all the IEMAllInstruction*.cpp.h files.
1748
1749	Raises exception on failure.
1750	"""
1751	sSrcDir = os.path.dirname(os.path.abspath(__file__));
1752	cErrors = 0;
1753	for sDefaultMap, sName in [
1754	( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
1755	( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
1756	]:
1757	cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
1758
1759	if cErrors != 0:
1760	raise Exception('%d parse errors' % (cErrors,));
1761	return True;
1762
1763
1764
1765	__parseAll();
1766
1767

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 65805

Download in other formats: