6 from xml.dom import minidom
9 # opgen.py -- generates tables and constants for decoding
16 # special mnemonic types for internal purposes.
18 spl_mnm_types = [ 'd3vil', \
33 # opcode-vendor dictionary
43 # opcode-mode dictionary
53 # opcode-operand dictionary
56 "Ap" : [ "OP_A" , "SZ_P" ],
57 "E" : [ "OP_E" , "SZ_NA" ],
58 "Eb" : [ "OP_E" , "SZ_B" ],
59 "Ew" : [ "OP_E" , "SZ_W" ],
60 "Ev" : [ "OP_E" , "SZ_V" ],
61 "Ed" : [ "OP_E" , "SZ_D" ],
62 "Eq" : [ "OP_E" , "SZ_Q" ],
63 "Ez" : [ "OP_E" , "SZ_Z" ],
64 "Ex" : [ "OP_E" , "SZ_MDQ" ],
65 "Ep" : [ "OP_E" , "SZ_P" ],
66 "G" : [ "OP_G" , "SZ_NA" ],
67 "Gb" : [ "OP_G" , "SZ_B" ],
68 "Gw" : [ "OP_G" , "SZ_W" ],
69 "Gv" : [ "OP_G" , "SZ_V" ],
70 "Gvw" : [ "OP_G" , "SZ_MDQ" ],
71 "Gd" : [ "OP_G" , "SZ_D" ],
72 "Gq" : [ "OP_G" , "SZ_Q" ],
73 "Gx" : [ "OP_G" , "SZ_MDQ" ],
74 "Gz" : [ "OP_G" , "SZ_Z" ],
75 "M" : [ "OP_M" , "SZ_NA" ],
76 "Mb" : [ "OP_M" , "SZ_B" ],
77 "Mw" : [ "OP_M" , "SZ_W" ],
78 "Ms" : [ "OP_M" , "SZ_W" ],
79 "Md" : [ "OP_M" , "SZ_D" ],
80 "Mq" : [ "OP_M" , "SZ_Q" ],
81 "Mt" : [ "OP_M" , "SZ_T" ],
82 "Mo" : [ "OP_M" , "SZ_O" ],
83 "I1" : [ "OP_I1" , "SZ_NA" ],
84 "I3" : [ "OP_I3" , "SZ_NA" ],
85 "Ib" : [ "OP_I" , "SZ_B" ],
86 "Isb" : [ "OP_I" , "SZ_SB" ],
87 "Iw" : [ "OP_I" , "SZ_W" ],
88 "Iv" : [ "OP_I" , "SZ_V" ],
89 "Iz" : [ "OP_I" , "SZ_Z" ],
90 "Jv" : [ "OP_J" , "SZ_V" ],
91 "Jz" : [ "OP_J" , "SZ_Z" ],
92 "Jb" : [ "OP_J" , "SZ_B" ],
93 "R" : [ "OP_R" , "SZ_RDQ" ],
94 "C" : [ "OP_C" , "SZ_NA" ],
95 "D" : [ "OP_D" , "SZ_NA" ],
96 "S" : [ "OP_S" , "SZ_NA" ],
97 "Ob" : [ "OP_O" , "SZ_B" ],
98 "Ow" : [ "OP_O" , "SZ_W" ],
99 "Ov" : [ "OP_O" , "SZ_V" ],
100 "V" : [ "OP_V" , "SZ_NA" ],
101 "W" : [ "OP_W" , "SZ_NA" ],
102 "P" : [ "OP_P" , "SZ_NA" ],
103 "Q" : [ "OP_Q" , "SZ_NA" ],
104 "VR" : [ "OP_VR" , "SZ_NA" ],
105 "PR" : [ "OP_PR" , "SZ_NA" ],
106 "AL" : [ "OP_AL" , "SZ_NA" ],
107 "CL" : [ "OP_CL" , "SZ_NA" ],
108 "DL" : [ "OP_DL" , "SZ_NA" ],
109 "BL" : [ "OP_BL" , "SZ_NA" ],
110 "AH" : [ "OP_AH" , "SZ_NA" ],
111 "CH" : [ "OP_CH" , "SZ_NA" ],
112 "DH" : [ "OP_DH" , "SZ_NA" ],
113 "BH" : [ "OP_BH" , "SZ_NA" ],
114 "AX" : [ "OP_AX" , "SZ_NA" ],
115 "CX" : [ "OP_CX" , "SZ_NA" ],
116 "DX" : [ "OP_DX" , "SZ_NA" ],
117 "BX" : [ "OP_BX" , "SZ_NA" ],
118 "SI" : [ "OP_SI" , "SZ_NA" ],
119 "DI" : [ "OP_DI" , "SZ_NA" ],
120 "SP" : [ "OP_SP" , "SZ_NA" ],
121 "BP" : [ "OP_BP" , "SZ_NA" ],
122 "eAX" : [ "OP_eAX" , "SZ_NA" ],
123 "eCX" : [ "OP_eCX" , "SZ_NA" ],
124 "eDX" : [ "OP_eDX" , "SZ_NA" ],
125 "eBX" : [ "OP_eBX" , "SZ_NA" ],
126 "eSI" : [ "OP_eSI" , "SZ_NA" ],
127 "eDI" : [ "OP_eDI" , "SZ_NA" ],
128 "eSP" : [ "OP_eSP" , "SZ_NA" ],
129 "eBP" : [ "OP_eBP" , "SZ_NA" ],
130 "rAX" : [ "OP_rAX" , "SZ_NA" ],
131 "rCX" : [ "OP_rCX" , "SZ_NA" ],
132 "rBX" : [ "OP_rBX" , "SZ_NA" ],
133 "rDX" : [ "OP_rDX" , "SZ_NA" ],
134 "rSI" : [ "OP_rSI" , "SZ_NA" ],
135 "rDI" : [ "OP_rDI" , "SZ_NA" ],
136 "rSP" : [ "OP_rSP" , "SZ_NA" ],
137 "rBP" : [ "OP_rBP" , "SZ_NA" ],
138 "ES" : [ "OP_ES" , "SZ_NA" ],
139 "CS" : [ "OP_CS" , "SZ_NA" ],
140 "DS" : [ "OP_DS" , "SZ_NA" ],
141 "SS" : [ "OP_SS" , "SZ_NA" ],
142 "GS" : [ "OP_GS" , "SZ_NA" ],
143 "FS" : [ "OP_FS" , "SZ_NA" ],
144 "ST0" : [ "OP_ST0" , "SZ_NA" ],
145 "ST1" : [ "OP_ST1" , "SZ_NA" ],
146 "ST2" : [ "OP_ST2" , "SZ_NA" ],
147 "ST3" : [ "OP_ST3" , "SZ_NA" ],
148 "ST4" : [ "OP_ST4" , "SZ_NA" ],
149 "ST5" : [ "OP_ST5" , "SZ_NA" ],
150 "ST6" : [ "OP_ST6" , "SZ_NA" ],
151 "ST7" : [ "OP_ST7" , "SZ_NA" ],
152 "NONE" : [ "OP_NONE" , "SZ_NA" ],
153 "ALr8b" : [ "OP_ALr8b" , "SZ_NA" ],
154 "CLr9b" : [ "OP_CLr9b" , "SZ_NA" ],
155 "DLr10b" : [ "OP_DLr10b" , "SZ_NA" ],
156 "BLr11b" : [ "OP_BLr11b" , "SZ_NA" ],
157 "AHr12b" : [ "OP_AHr12b" , "SZ_NA" ],
158 "CHr13b" : [ "OP_CHr13b" , "SZ_NA" ],
159 "DHr14b" : [ "OP_DHr14b" , "SZ_NA" ],
160 "BHr15b" : [ "OP_BHr15b" , "SZ_NA" ],
161 "rAXr8" : [ "OP_rAXr8" , "SZ_NA" ],
162 "rCXr9" : [ "OP_rCXr9" , "SZ_NA" ],
163 "rDXr10" : [ "OP_rDXr10" , "SZ_NA" ],
164 "rBXr11" : [ "OP_rBXr11" , "SZ_NA" ],
165 "rSPr12" : [ "OP_rSPr12" , "SZ_NA" ],
166 "rBPr13" : [ "OP_rBPr13" , "SZ_NA" ],
167 "rSIr14" : [ "OP_rSIr14" , "SZ_NA" ],
168 "rDIr15" : [ "OP_rDIr15" , "SZ_NA" ],
169 "jWP" : [ "OP_J" , "SZ_WP" ],
170 "jDP" : [ "OP_J" , "SZ_DP" ],
175 # opcode prefix dictionary
201 default_opr = 'O_NONE, O_NONE, O_NONE'
205 # collect the operand/size constants
207 for o in operand_dict.keys():
208 if not (operand_dict[o][0] in opr_constants):
209 opr_constants.append(operand_dict[o][0])
210 if not (operand_dict[o][1] in siz_constants):
211 siz_constants.append(operand_dict[o][1])
213 xmlDoc = minidom.parse(sys.argv[1])
214 tlNode = xmlDoc.firstChild
217 # look for top-level optable node
219 while tlNode and tlNode.localName != "x86optable": tlNode = tlNode.nextSibling
222 # creates a table entry
224 def centry(i, defmap):
225 if defmap["type"][0:3] == "grp":
227 mnm = 'UD_I' + defmap["type"].lower()
228 pfx = defmap["name"].upper()
229 elif defmap["type"] == "leaf":
230 mnm = "UD_I" + defmap["name"]
233 if len(mnm) == 0: mnm = "UD_Ina"
234 if len(opr) == 0: opr = default_opr
235 if len(pfx) == 0: pfx = "P_none"
241 return " /* %s */ { %-16s %-26s %s },\n" % (i, mnm + ',', opr + ',', pfx)
244 # makes a new table and adds it to the global
247 def mktab(name, size):
248 if not (name in tables.keys()):
250 table_sizes[name] = size
252 for node in tlNode.childNodes:
258 # we are only interested in <instruction>
259 if node.localName != 'instruction':
262 # we need the mnemonic attribute
263 if not ('mnemonic' in node.attributes.keys()):
264 print "error: no mnemonic given in <instruction>."
267 # check if this instruction was already defined.
268 # else add it to the global list of mnemonics
269 mnemonic = node.attributes['mnemonic'].value
270 if mnemonic in mnm_list:
271 print "error: multiple declarations of mnemonic='%s'" % mnemonic;
274 mnm_list.append(mnemonic)
277 # collect instruction
281 for n in node.childNodes:
282 if n.localName == 'vendor':
283 vendor = (n.firstChild.data).strip();
284 elif n.localName == 'class':
285 iclass = n.firstChild.data;
288 # for each opcode definition
290 for n in node.childNodes:
291 if n.localName != 'opcode':
294 opcode = n.firstChild.data.strip();
295 parts = opcode.split(";");
302 # get cast attribute, if given
303 if 'cast' in n.attributes.keys():
304 pfx_c.append( "P_c" + n.attributes['cast'].value )
306 # get implicit addressing attribute, if given
307 if 'imp_addr' in n.attributes.keys():
308 if int( n.attributes['imp_addr'].value ):
309 pfx_c.append( "P_ImpAddr" )
311 # get mode attribute, if given
312 if 'mode' in n.attributes.keys():
313 v = (n.attributes['mode'].value).strip()
316 if not (m in pfx_dict):
317 print "warning: unrecognized mode attribute '%s'" % m
319 pfx_c.append(pfx_dict[m])
322 # split opcode definition into
324 # 2. opcode bytes (opc)
328 opc = parts[0].split()
329 elif len(parts) == 2:
330 opc = parts[0].split()
331 opr = parts[1].split()
334 pfx = parts[0].split()
335 opc = parts[1].split()
337 elif len(parts) == 3:
338 pfx = parts[0].split()
339 opc = parts[1].split()
340 opr = parts[2].split()
342 print "error: invalid opcode definition of %s\n" % mnemonic
344 # Convert opcodes to upper case
345 for i in range(len(opc)):
346 opc[i] = opc[i].upper()
349 # check for special cases of instruction translation
352 if mnemonic == 'pause' or \
353 ( mnemonic == 'nop' and opc[0] == '90' ) or \
354 mnemonic == 'invalid' or \
362 if not ( p in pfx_dict.keys() ):
363 print "error: invalid prefix specification: %s \n" % pfx
364 pfx_c.append( pfx_dict[p] )
366 pfx_c.append( "P_none" )
367 pfx = "|".join( pfx_c )
372 opr_c = [ "O_NONE", "O_NONE", "O_NONE" ]
373 for i in range(len(opr)):
374 if not (opr[i] in operand_dict.keys()):
375 print "error: invalid operand declaration: %s\n" % opr[i]
376 opr_c[i] = "O_" + opr[i]
377 opr = "%-8s %-8s %s" % (opr_c[0] + ",", opr_c[1] + ",", opr_c[2])
380 table_name = 'itab__1byte'
387 elif op == '0F' and len(table_sse):
388 table_name = "itab__pfx_%s__0f" % table_sse
392 table_name = "itab__0f"
394 elif op == '38' and (table_name == "itab__0f" or
395 table_name == "itab__pfx_SSE66__0f"):
397 tables[table_name][table_index] = { \
398 'type' : 'grp_3byte', \
399 'name' : "%s__38" % (table_name) \
401 table_name = tables[table_name][table_index]['name']
403 elif op[0:5] == '/X87=':
404 tables[table_name][table_index] = { \
405 'type' : 'grp_x87', \
406 'name' : "%s__op_%s__x87" % (table_name, table_index) \
408 table_name = tables[table_name][table_index]['name']
409 table_index = "%02X" % int(op[5:7], 16)
411 elif op[0:4] == '/RM=':
412 tables[table_name][table_index] = { \
414 'name' : "%s__op_%s__rm" % (table_name, table_index) \
416 table_name = tables[table_name][table_index]['name']
417 table_index = "%02X" % int(op[4:6])
419 elif op[0:5] == '/MOD=':
420 tables[table_name][table_index] = { \
421 'type' : 'grp_mod', \
422 'name' : "%s__op_%s__mod" % (table_name, table_index) \
424 table_name = tables[table_name][table_index]['name']
429 mod_dict = { '!11' : 0, '11' : 1 }
430 table_index = "%02X" % int(mod_dict[v])
432 elif op[0:2] == '/O':
433 tables[table_name][table_index] = { \
434 'type' : 'grp_osize', \
435 'name' : "%s__op_%s__osize" % (table_name, table_index) \
437 table_name = tables[table_name][table_index]['name']
438 table_index = "%02X" % int(mode_dict[op[2:4]])
440 elif op[0:2] == '/A':
441 tables[table_name][table_index] = { \
442 'type' : 'grp_asize', \
443 'name' : "%s__op_%s__asize" % (table_name, table_index) \
445 table_name = tables[table_name][table_index]['name']
446 table_index = "%02X" % int(mode_dict[op[2:4]])
448 elif op[0:2] == '/M':
449 tables[table_name][table_index] = { \
450 'type' : 'grp_mode', \
451 'name' : "%s__op_%s__mode" % (table_name, table_index) \
453 table_name = tables[table_name][table_index]['name']
454 table_index = "%02X" % int(mode_dict[op[2:4]])
456 elif op[0:6] == '/3DNOW':
457 table_name = "itab__3dnow"
460 tables[table_name][table_index] = { \
461 'type' : 'grp_reg', \
462 'name' : "%s__op_%s__reg" % (table_name, table_index) \
464 table_name = tables[table_name][table_index]['name']
465 table_index = "%02X" % int(op[1:2])
470 mktab(table_name, table_size)
473 tables[table_name][table_index] = { \
474 'type' : 'grp_vendor', \
475 'name' : "%s__op_%s__vendor" % (table_name, table_index) \
477 table_name = tables[table_name][table_index]['name']
478 table_index = vend_dict[vendor]
480 mktab(table_name, table_size)
482 tables[table_name][table_index] = { \
491 tables[table_name][vend_dict['ANY']] = { \
499 # ---------------------------------------------------------------------
501 # ---------------------------------------------------------------------
503 f = open("itab.h", "w")
506 /* itab.h -- auto generated by opgen.py, do not edit. */
514 # Generate enumeration of size constants
520 f.write("\nenum ud_itab_vendor_index {\n" )
521 f.write(" ITAB__VENDOR_INDX__AMD,\n" )
522 f.write(" ITAB__VENDOR_INDX__INTEL,\n" )
523 f.write(" ITAB__VENDOR_INDX__ANY,\n" )
527 f.write("\nenum ud_itab_mode_index {\n" )
528 f.write(" ITAB__MODE_INDX__16,\n" )
529 f.write(" ITAB__MODE_INDX__32,\n" )
530 f.write(" ITAB__MODE_INDX__64\n" )
534 f.write("\nenum ud_itab_mod_index {\n" )
535 f.write(" ITAB__MOD_INDX__NOT_11,\n" )
536 f.write(" ITAB__MOD_INDX__11\n" )
540 # Generate enumeration of the tables
542 table_names = tables.keys()
545 f.write( "\nenum ud_itab_index {\n" )
546 for name in table_names:
547 f.write(" %s,\n" % name.upper() );
551 # Generate mnemonics list
553 f.write("\nenum __attribute__((packed)) ud_mnemonic_code {\n")
555 f.write(" UD_I%s,\n" % m)
556 for m in spl_mnm_types:
557 f.write(" UD_I%s,\n" % m)
561 # Generate operand definitions
563 f.write("\n/* itab entry operand definitions */\n\n");
564 operands = operand_dict.keys()
567 f.write("#define O_%-7s { %-12s %-8s }\n" %
568 (o, operand_dict[o][0] + ",", operand_dict[o][1]));
572 # Generate struct defs
577 extern const char* ud_mnemonics_str[];;
578 extern struct ud_itab_entry* ud_itab_list[];
587 # ---------------------------------------------------------------------
589 # ---------------------------------------------------------------------
591 f = open("itab.c", "w")
594 /* itab.c -- auto generated by opgen.py, do not edit. */
603 # generate mnemonic list
605 f.write("const char * ud_mnemonics_str[] = {\n")
607 f.write(" \"%s\",\n" % m )
611 # generate instruction tables
615 for t in table_names:
616 f.write("\nstatic struct ud_itab_entry " + t.lower() + "[%d] = {\n" % table_sizes[t]);
617 for i in range(int(table_sizes[t])):
619 if index in tables[t]:
620 f.write(centry(index, tables[t][index]))
622 f.write(centry(index,{"type":"invalid"}))
626 # write the instruction table list
628 f.write( "\n/* the order of this table matches enum ud_itab_index */")
629 f.write( "\nstruct ud_itab_entry * ud_itab_list[] = {\n" )
630 for name in table_names:
631 f.write( " %s,\n" % name.lower() )