]> andersk Git - udis86.git/blame - libudis86/opgen.py
Use the public http URI to docbook.xsl.
[udis86.git] / libudis86 / opgen.py
CommitLineData
bbe45369 1#!/usr/bin/env python
2
3import os
4import sys
5import string
6from xml.dom import minidom
7
8#
9# opgen.py -- generates tables and constants for decoding
10#
11# - itab.c
12# - itab.h
13#
14
15#
16# special mnemonic types for internal purposes.
17#
18spl_mnm_types = [ 'd3vil', \
19 'na', \
20 'grp_reg', \
21 'grp_rm', \
22 'grp_vendor', \
23 'grp_x87', \
24 'grp_mode', \
25 'grp_osize', \
26 'grp_asize', \
27 'grp_mod', \
28 'grp_3byte', \
29 'none' \
30 ]
31
32#
33# opcode-vendor dictionary
34#
35vend_dict = {
36 'AMD' : '00',
37 'INTEL' : '01',
38 'ANY' : '02'
39}
40
41
42#
43# opcode-mode dictionary
44#
45
46mode_dict = {
47 '16' : '00',
48 '32' : '01',
49 '64' : '02'
50}
51
52#
53# opcode-operand dictionary
54#
55operand_dict = {
56 "Ap" : [ "OP_A" , "SZ_P" ],
57 "E" : [ "OP_E" , "SZ_NA" ],
58 "Eb" : [ "OP_E" , "SZ_B" ],
59 "Ew" : [ "OP_E" , "SZ_W" ],
60 "Ev" : [ "OP_E" , "SZ_V" ],
61 "Ed" : [ "OP_E" , "SZ_D" ],
62 "Eq" : [ "OP_E" , "SZ_Q" ],
63 "Ez" : [ "OP_E" , "SZ_Z" ],
64 "Ex" : [ "OP_E" , "SZ_MDQ" ],
65 "Ep" : [ "OP_E" , "SZ_P" ],
66 "G" : [ "OP_G" , "SZ_NA" ],
67 "Gb" : [ "OP_G" , "SZ_B" ],
68 "Gw" : [ "OP_G" , "SZ_W" ],
69 "Gv" : [ "OP_G" , "SZ_V" ],
70 "Gvw" : [ "OP_G" , "SZ_MDQ" ],
71 "Gd" : [ "OP_G" , "SZ_D" ],
72 "Gq" : [ "OP_G" , "SZ_Q" ],
73 "Gx" : [ "OP_G" , "SZ_MDQ" ],
74 "Gz" : [ "OP_G" , "SZ_Z" ],
75 "M" : [ "OP_M" , "SZ_NA" ],
76 "Mb" : [ "OP_M" , "SZ_B" ],
77 "Mw" : [ "OP_M" , "SZ_W" ],
78 "Ms" : [ "OP_M" , "SZ_W" ],
79 "Md" : [ "OP_M" , "SZ_D" ],
80 "Mq" : [ "OP_M" , "SZ_Q" ],
81 "Mt" : [ "OP_M" , "SZ_T" ],
82 "Mo" : [ "OP_M" , "SZ_O" ],
83 "I1" : [ "OP_I1" , "SZ_NA" ],
84 "I3" : [ "OP_I3" , "SZ_NA" ],
85 "Ib" : [ "OP_I" , "SZ_B" ],
86 "Isb" : [ "OP_I" , "SZ_SB" ],
87 "Iw" : [ "OP_I" , "SZ_W" ],
88 "Iv" : [ "OP_I" , "SZ_V" ],
89 "Iz" : [ "OP_I" , "SZ_Z" ],
90 "Jv" : [ "OP_J" , "SZ_V" ],
91 "Jz" : [ "OP_J" , "SZ_Z" ],
92 "Jb" : [ "OP_J" , "SZ_B" ],
93 "R" : [ "OP_R" , "SZ_RDQ" ],
94 "C" : [ "OP_C" , "SZ_NA" ],
95 "D" : [ "OP_D" , "SZ_NA" ],
96 "S" : [ "OP_S" , "SZ_NA" ],
97 "Ob" : [ "OP_O" , "SZ_B" ],
98 "Ow" : [ "OP_O" , "SZ_W" ],
99 "Ov" : [ "OP_O" , "SZ_V" ],
100 "V" : [ "OP_V" , "SZ_NA" ],
101 "W" : [ "OP_W" , "SZ_NA" ],
102 "P" : [ "OP_P" , "SZ_NA" ],
103 "Q" : [ "OP_Q" , "SZ_NA" ],
104 "VR" : [ "OP_VR" , "SZ_NA" ],
105 "PR" : [ "OP_PR" , "SZ_NA" ],
106 "AL" : [ "OP_AL" , "SZ_NA" ],
107 "CL" : [ "OP_CL" , "SZ_NA" ],
108 "DL" : [ "OP_DL" , "SZ_NA" ],
109 "BL" : [ "OP_BL" , "SZ_NA" ],
110 "AH" : [ "OP_AH" , "SZ_NA" ],
111 "CH" : [ "OP_CH" , "SZ_NA" ],
112 "DH" : [ "OP_DH" , "SZ_NA" ],
113 "BH" : [ "OP_BH" , "SZ_NA" ],
114 "AX" : [ "OP_AX" , "SZ_NA" ],
115 "CX" : [ "OP_CX" , "SZ_NA" ],
116 "DX" : [ "OP_DX" , "SZ_NA" ],
117 "BX" : [ "OP_BX" , "SZ_NA" ],
118 "SI" : [ "OP_SI" , "SZ_NA" ],
119 "DI" : [ "OP_DI" , "SZ_NA" ],
120 "SP" : [ "OP_SP" , "SZ_NA" ],
121 "BP" : [ "OP_BP" , "SZ_NA" ],
122 "eAX" : [ "OP_eAX" , "SZ_NA" ],
123 "eCX" : [ "OP_eCX" , "SZ_NA" ],
124 "eDX" : [ "OP_eDX" , "SZ_NA" ],
125 "eBX" : [ "OP_eBX" , "SZ_NA" ],
126 "eSI" : [ "OP_eSI" , "SZ_NA" ],
127 "eDI" : [ "OP_eDI" , "SZ_NA" ],
128 "eSP" : [ "OP_eSP" , "SZ_NA" ],
129 "eBP" : [ "OP_eBP" , "SZ_NA" ],
130 "rAX" : [ "OP_rAX" , "SZ_NA" ],
131 "rCX" : [ "OP_rCX" , "SZ_NA" ],
132 "rBX" : [ "OP_rBX" , "SZ_NA" ],
133 "rDX" : [ "OP_rDX" , "SZ_NA" ],
134 "rSI" : [ "OP_rSI" , "SZ_NA" ],
135 "rDI" : [ "OP_rDI" , "SZ_NA" ],
136 "rSP" : [ "OP_rSP" , "SZ_NA" ],
137 "rBP" : [ "OP_rBP" , "SZ_NA" ],
138 "ES" : [ "OP_ES" , "SZ_NA" ],
139 "CS" : [ "OP_CS" , "SZ_NA" ],
140 "DS" : [ "OP_DS" , "SZ_NA" ],
141 "SS" : [ "OP_SS" , "SZ_NA" ],
142 "GS" : [ "OP_GS" , "SZ_NA" ],
143 "FS" : [ "OP_FS" , "SZ_NA" ],
144 "ST0" : [ "OP_ST0" , "SZ_NA" ],
145 "ST1" : [ "OP_ST1" , "SZ_NA" ],
146 "ST2" : [ "OP_ST2" , "SZ_NA" ],
147 "ST3" : [ "OP_ST3" , "SZ_NA" ],
148 "ST4" : [ "OP_ST4" , "SZ_NA" ],
149 "ST5" : [ "OP_ST5" , "SZ_NA" ],
150 "ST6" : [ "OP_ST6" , "SZ_NA" ],
151 "ST7" : [ "OP_ST7" , "SZ_NA" ],
152 "NONE" : [ "OP_NONE" , "SZ_NA" ],
153 "ALr8b" : [ "OP_ALr8b" , "SZ_NA" ],
154 "CLr9b" : [ "OP_CLr9b" , "SZ_NA" ],
155 "DLr10b" : [ "OP_DLr10b" , "SZ_NA" ],
156 "BLr11b" : [ "OP_BLr11b" , "SZ_NA" ],
157 "AHr12b" : [ "OP_AHr12b" , "SZ_NA" ],
158 "CHr13b" : [ "OP_CHr13b" , "SZ_NA" ],
159 "DHr14b" : [ "OP_DHr14b" , "SZ_NA" ],
160 "BHr15b" : [ "OP_BHr15b" , "SZ_NA" ],
161 "rAXr8" : [ "OP_rAXr8" , "SZ_NA" ],
162 "rCXr9" : [ "OP_rCXr9" , "SZ_NA" ],
163 "rDXr10" : [ "OP_rDXr10" , "SZ_NA" ],
164 "rBXr11" : [ "OP_rBXr11" , "SZ_NA" ],
165 "rSPr12" : [ "OP_rSPr12" , "SZ_NA" ],
166 "rBPr13" : [ "OP_rBPr13" , "SZ_NA" ],
167 "rSIr14" : [ "OP_rSIr14" , "SZ_NA" ],
168 "rDIr15" : [ "OP_rDIr15" , "SZ_NA" ],
169 "jWP" : [ "OP_J" , "SZ_WP" ],
170 "jDP" : [ "OP_J" , "SZ_DP" ],
171
172}
173
174#
175# opcode prefix dictionary
176#
177pfx_dict = {
178 "aso" : "P_aso",
179 "oso" : "P_oso",
180 "rexw" : "P_rexw",
181 "rexb" : "P_rexb",
182 "rexx" : "P_rexx",
183 "rexr" : "P_rexr",
184 "inv64" : "P_inv64",
185 "def64" : "P_def64",
186 "depM" : "P_depM",
187 "cast1" : "P_c1",
188 "cast2" : "P_c2",
189 "cast3" : "P_c3"
190}
191
192
193#
194# globals
195#
196opr_constants = []
197siz_constants = []
198tables = {}
199table_sizes = {}
200mnm_list = []
201default_opr = 'O_NONE, O_NONE, O_NONE'
202
203
204#
205# collect the operand/size constants
206#
207for o in operand_dict.keys():
208 if not (operand_dict[o][0] in opr_constants):
209 opr_constants.append(operand_dict[o][0])
210 if not (operand_dict[o][1] in siz_constants):
211 siz_constants.append(operand_dict[o][1])
212
213xmlDoc = minidom.parse(sys.argv[1])
214tlNode = xmlDoc.firstChild
215
216#
217# look for top-level optable node
218#
219while tlNode and tlNode.localName != "x86optable": tlNode = tlNode.nextSibling
220
221#
222# creates a table entry
223#
224def centry(i, defmap):
225 if defmap["type"][0:3] == "grp":
226 opr = default_opr
227 mnm = 'UD_I' + defmap["type"].lower()
228 pfx = defmap["name"].upper()
229 elif defmap["type"] == "leaf":
230 mnm = "UD_I" + defmap["name"]
231 opr = defmap["opr"]
232 pfx = defmap["pfx"]
233 if len(mnm) == 0: mnm = "UD_Ina"
234 if len(opr) == 0: opr = default_opr
235 if len(pfx) == 0: pfx = "P_none"
236 else:
237 opr = default_opr
238 pfx = "P_none"
239 mnm = "UD_Iinvalid"
240
241 return " /* %s */ { %-16s %-26s %s },\n" % (i, mnm + ',', opr + ',', pfx)
242
243#
244# makes a new table and adds it to the global
245# list of tables
246#
247def mktab(name, size):
248 if not (name in tables.keys()):
249 tables[name] = {}
250 table_sizes[name] = size
251
252for node in tlNode.childNodes:
253
254 opcodes = []
255 iclass = ''
256 vendor = ''
257
258 # we are only interested in <instruction>
259 if node.localName != 'instruction':
260 continue
261
262 # we need the mnemonic attribute
263 if not ('mnemonic' in node.attributes.keys()):
264 print "error: no mnemonic given in <instruction>."
265 sys.exit(-1)
266
267 # check if this instruction was already defined.
268 # else add it to the global list of mnemonics
269 mnemonic = node.attributes['mnemonic'].value
270 if mnemonic in mnm_list:
271 print "error: multiple declarations of mnemonic='%s'" % mnemonic;
272 sys.exit(-1)
273 else:
274 mnm_list.append(mnemonic)
275
276 #
277 # collect instruction
278 # - vendor
279 # - class
280 #
281 for n in node.childNodes:
282 if n.localName == 'vendor':
283 vendor = (n.firstChild.data).strip();
284 elif n.localName == 'class':
285 iclass = n.firstChild.data;
286
287 #
288 # for each opcode definition
289 #
290 for n in node.childNodes:
291 if n.localName != 'opcode':
292 continue;
293
294 opcode = n.firstChild.data.strip();
295 parts = opcode.split(";");
296 flags = []
297 opr = []
298 pfx = []
299 opr = []
300 pfx_c = []
301
302 # get cast attribute, if given
303 if 'cast' in n.attributes.keys():
304 pfx_c.append( "P_c" + n.attributes['cast'].value )
305
306 # get implicit addressing attribute, if given
307 if 'imp_addr' in n.attributes.keys():
308 if int( n.attributes['imp_addr'].value ):
309 pfx_c.append( "P_ImpAddr" )
310
311 # get mode attribute, if given
312 if 'mode' in n.attributes.keys():
313 v = (n.attributes['mode'].value).strip()
314 modef = v.split();
315 for m in modef:
316 if not (m in pfx_dict):
317 print "warning: unrecognized mode attribute '%s'" % m
318 else:
319 pfx_c.append(pfx_dict[m])
320
321 #
322 # split opcode definition into
323 # 1. prefixes (pfx)
324 # 2. opcode bytes (opc)
325 # 3. operands
326 #
327 if len(parts) == 1:
328 opc = parts[0].split()
329 elif len(parts) == 2:
330 opc = parts[0].split()
331 opr = parts[1].split()
332 for o in opc:
333 if o in pfx_dict:
334 pfx = parts[0].split()
335 opc = parts[1].split()
336 break
337 elif len(parts) == 3:
338 pfx = parts[0].split()
339 opc = parts[1].split()
340 opr = parts[2].split()
341 else:
342 print "error: invalid opcode definition of %s\n" % mnemonic
343 sys.exit(-1)
344 # Convert opcodes to upper case
345 for i in range(len(opc)):
346 opc[i] = opc[i].upper()
347
348 #
349 # check for special cases of instruction translation
350 # and ignore them
351 #
352 if mnemonic == 'pause' or \
353 ( mnemonic == 'nop' and opc[0] == '90' ) or \
354 mnemonic == 'invalid' or \
355 mnemonic == 'db' :
356 continue
357
358 #
359 # Convert prefix
360 #
361 for p in pfx:
362 if not ( p in pfx_dict.keys() ):
363 print "error: invalid prefix specification: %s \n" % pfx
364 pfx_c.append( pfx_dict[p] )
365 if len(pfx) == 0:
366 pfx_c.append( "P_none" )
367 pfx = "|".join( pfx_c )
368
369 #
370 # Convert operands
371 #
372 opr_c = [ "O_NONE", "O_NONE", "O_NONE" ]
373 for i in range(len(opr)):
374 if not (opr[i] in operand_dict.keys()):
375 print "error: invalid operand declaration: %s\n" % opr[i]
376 opr_c[i] = "O_" + opr[i]
377 opr = "%-8s %-8s %s" % (opr_c[0] + ",", opr_c[1] + ",", opr_c[2])
378
379 table_sse = ''
380 table_name = 'itab__1byte'
381 table_size = 256
382 table_index = ''
383
384 for op in opc:
385 if op[0:3] == 'SSE':
386 table_sse = op
387 elif op == '0F' and len(table_sse):
388 table_name = "itab__pfx_%s__0f" % table_sse
389 table_size = 256
390 table_sse = ''
391 elif op == '0F':
392 table_name = "itab__0f"
393 table_size = 256
394 elif op == '38' and (table_name == "itab__0f" or
395 table_name == "itab__pfx_SSE66__0f"):
396 table_index = '38'
397 tables[table_name][table_index] = { \
398 'type' : 'grp_3byte', \
399 'name' : "%s__38" % (table_name) \
400 }
401 table_name = tables[table_name][table_index]['name']
402 table_size = 256
403 elif op[0:5] == '/X87=':
404 tables[table_name][table_index] = { \
405 'type' : 'grp_x87', \
406 'name' : "%s__op_%s__x87" % (table_name, table_index) \
407 }
408 table_name = tables[table_name][table_index]['name']
409 table_index = "%02X" % int(op[5:7], 16)
410 table_size = 64
411 elif op[0:4] == '/RM=':
412 tables[table_name][table_index] = { \
413 'type' : 'grp_rm', \
414 'name' : "%s__op_%s__rm" % (table_name, table_index) \
415 }
416 table_name = tables[table_name][table_index]['name']
417 table_index = "%02X" % int(op[4:6])
418 table_size = 8
419 elif op[0:5] == '/MOD=':
420 tables[table_name][table_index] = { \
421 'type' : 'grp_mod', \
422 'name' : "%s__op_%s__mod" % (table_name, table_index) \
423 }
424 table_name = tables[table_name][table_index]['name']
425 if len(op) == 8:
426 v = op[5:8]
427 else:
428 v = op[5:7]
429 mod_dict = { '!11' : 0, '11' : 1 }
430 table_index = "%02X" % int(mod_dict[v])
431 table_size = 2
432 elif op[0:2] == '/O':
433 tables[table_name][table_index] = { \
434 'type' : 'grp_osize', \
435 'name' : "%s__op_%s__osize" % (table_name, table_index) \
436 }
437 table_name = tables[table_name][table_index]['name']
438 table_index = "%02X" % int(mode_dict[op[2:4]])
439 table_size = 3
440 elif op[0:2] == '/A':
441 tables[table_name][table_index] = { \
442 'type' : 'grp_asize', \
443 'name' : "%s__op_%s__asize" % (table_name, table_index) \
444 }
445 table_name = tables[table_name][table_index]['name']
446 table_index = "%02X" % int(mode_dict[op[2:4]])
447 table_size = 3
448 elif op[0:2] == '/M':
449 tables[table_name][table_index] = { \
450 'type' : 'grp_mode', \
451 'name' : "%s__op_%s__mode" % (table_name, table_index) \
452 }
453 table_name = tables[table_name][table_index]['name']
454 table_index = "%02X" % int(mode_dict[op[2:4]])
455 table_size = 3
456 elif op[0:6] == '/3DNOW':
457 table_name = "itab__3dnow"
458 table_size = 256
459 elif op[0:1] == '/':
460 tables[table_name][table_index] = { \
461 'type' : 'grp_reg', \
462 'name' : "%s__op_%s__reg" % (table_name, table_index) \
463 }
464 table_name = tables[table_name][table_index]['name']
465 table_index = "%02X" % int(op[1:2])
466 table_size = 8
467 else:
468 table_index = op
469
470 mktab(table_name, table_size)
471
472 if len(vendor):
473 tables[table_name][table_index] = { \
474 'type' : 'grp_vendor', \
475 'name' : "%s__op_%s__vendor" % (table_name, table_index) \
476 }
477 table_name = tables[table_name][table_index]['name']
478 table_index = vend_dict[vendor]
479 table_size = 3
480 mktab(table_name, table_size)
481
482 tables[table_name][table_index] = { \
483 'type' : 'leaf', \
484 'name' : mnemonic, \
485 'pfx' : pfx, \
486 'opr' : opr, \
487 'flags' : flags \
488 }
489
490 if len(vendor):
491 tables[table_name][vend_dict['ANY']] = { \
492 'type' : 'leaf', \
493 'name' : mnemonic, \
494 'pfx' : pfx, \
495 'opr' : opr, \
496 'flags' : flags \
497 }
498
499# ---------------------------------------------------------------------
500# Generate itab.h
501# ---------------------------------------------------------------------
502
503f = open("itab.h", "w")
504
505f.write('''
506/* itab.h -- auto generated by opgen.py, do not edit. */
507
508#ifndef UD_ITAB_H
509#define UD_ITAB_H
510
511''')
512
513#
514# Generate enumeration of size constants
515#
516siz_constants.sort()
517f.write('''
518''')
519
520f.write("\nenum ud_itab_vendor_index {\n" )
521f.write(" ITAB__VENDOR_INDX__AMD,\n" )
522f.write(" ITAB__VENDOR_INDX__INTEL,\n" )
523f.write(" ITAB__VENDOR_INDX__ANY,\n" )
524f.write("};\n\n")
525
526
527f.write("\nenum ud_itab_mode_index {\n" )
528f.write(" ITAB__MODE_INDX__16,\n" )
529f.write(" ITAB__MODE_INDX__32,\n" )
530f.write(" ITAB__MODE_INDX__64\n" )
531f.write("};\n\n")
532
533
534f.write("\nenum ud_itab_mod_index {\n" )
535f.write(" ITAB__MOD_INDX__NOT_11,\n" )
536f.write(" ITAB__MOD_INDX__11\n" )
537f.write("};\n\n")
538
539#
540# Generate enumeration of the tables
541#
542table_names = tables.keys()
543table_names.sort();
544
545f.write( "\nenum ud_itab_index {\n" )
546for name in table_names:
547 f.write(" %s,\n" % name.upper() );
548f.write( "};\n\n" )
549
550#
551# Generate mnemonics list
552#
553f.write("\nenum __attribute__((packed)) ud_mnemonic_code {\n")
554for m in mnm_list:
555 f.write(" UD_I%s,\n" % m)
556for m in spl_mnm_types:
557 f.write(" UD_I%s,\n" % m)
558f.write("};\n\n")
559
560#
561# Generate operand definitions
562#
563f.write("\n/* itab entry operand definitions */\n\n");
564operands = operand_dict.keys()
565operands.sort()
566for o in operands:
567 f.write("#define O_%-7s { %-12s %-8s }\n" %
568 (o, operand_dict[o][0] + ",", operand_dict[o][1]));
569f.write("\n");
570
571#
572# Generate struct defs
573#
574f.write( \
575'''
576
577extern const char* ud_mnemonics_str[];;
578extern struct ud_itab_entry* ud_itab_list[];
579
580''' )
581
582
583f.write("#endif\n")
584
585f.close()
586
587# ---------------------------------------------------------------------
588# Generate itab.c
589# ---------------------------------------------------------------------
590
591f = open("itab.c", "w")
592
593f.write('''
594/* itab.c -- auto generated by opgen.py, do not edit. */
595
596#include "types.h"
597#include "decode.h"
598#include "itab.h"
599
600''')
601
602#
603# generate mnemonic list
604#
605f.write("const char * ud_mnemonics_str[] = {\n")
606for m in mnm_list:
607 f.write(" \"%s\",\n" % m )
608f.write("};\n\n")
609
610#
611# generate instruction tables
612#
613
614f.write("\n")
615for t in table_names:
616 f.write("\nstatic struct ud_itab_entry " + t.lower() + "[%d] = {\n" % table_sizes[t]);
617 for i in range(int(table_sizes[t])):
618 index = "%02X" % i
619 if index in tables[t]:
620 f.write(centry(index, tables[t][index]))
621 else:
622 f.write(centry(index,{"type":"invalid"}))
623 f.write("};\n");
624
625#
626# write the instruction table list
627#
628f.write( "\n/* the order of this table matches enum ud_itab_index */")
629f.write( "\nstruct ud_itab_entry * ud_itab_list[] = {\n" )
630for name in table_names:
631 f.write( " %s,\n" % name.lower() )
632f.write( "};\n" );
633
634f.close();
635
636# vim:expandtab
637# vim:sw=4
638# vim:ts=4
This page took 0.135983 seconds and 5 git commands to generate.