#!/usr/bin/env python import os import sys import string from xml.dom import minidom # # opgen.py -- generates tables and constants for decoding # # - itab.c # - itab.h # # # special mnemonic types for internal purposes. # spl_mnm_types = [ 'd3vil', \ 'na', \ 'grp_reg', \ 'grp_rm', \ 'grp_vendor', \ 'grp_x87', \ 'grp_mode', \ 'grp_osize', \ 'grp_asize', \ 'grp_mod', \ 'grp_3byte', \ 'none' \ ] # # opcode-vendor dictionary # vend_dict = { 'AMD' : '00', 'INTEL' : '01', 'ANY' : '02' } # # opcode-mode dictionary # mode_dict = { '16' : '00', '32' : '01', '64' : '02' } # # opcode-operand dictionary # operand_dict = { "Ap" : [ "OP_A" , "SZ_P" ], "E" : [ "OP_E" , "SZ_NA" ], "Eb" : [ "OP_E" , "SZ_B" ], "Ew" : [ "OP_E" , "SZ_W" ], "Ev" : [ "OP_E" , "SZ_V" ], "Ed" : [ "OP_E" , "SZ_D" ], "Eq" : [ "OP_E" , "SZ_Q" ], "Ez" : [ "OP_E" , "SZ_Z" ], "Ex" : [ "OP_E" , "SZ_MDQ" ], "Ep" : [ "OP_E" , "SZ_P" ], "G" : [ "OP_G" , "SZ_NA" ], "Gb" : [ "OP_G" , "SZ_B" ], "Gw" : [ "OP_G" , "SZ_W" ], "Gv" : [ "OP_G" , "SZ_V" ], "Gvw" : [ "OP_G" , "SZ_MDQ" ], "Gd" : [ "OP_G" , "SZ_D" ], "Gq" : [ "OP_G" , "SZ_Q" ], "Gx" : [ "OP_G" , "SZ_MDQ" ], "Gz" : [ "OP_G" , "SZ_Z" ], "M" : [ "OP_M" , "SZ_NA" ], "Mb" : [ "OP_M" , "SZ_B" ], "Mw" : [ "OP_M" , "SZ_W" ], "Ms" : [ "OP_M" , "SZ_W" ], "Md" : [ "OP_M" , "SZ_D" ], "Mq" : [ "OP_M" , "SZ_Q" ], "Mt" : [ "OP_M" , "SZ_T" ], "Mo" : [ "OP_M" , "SZ_O" ], "I1" : [ "OP_I1" , "SZ_NA" ], "I3" : [ "OP_I3" , "SZ_NA" ], "Ib" : [ "OP_I" , "SZ_B" ], "Isb" : [ "OP_I" , "SZ_SB" ], "Iw" : [ "OP_I" , "SZ_W" ], "Iv" : [ "OP_I" , "SZ_V" ], "Iz" : [ "OP_I" , "SZ_Z" ], "Jv" : [ "OP_J" , "SZ_V" ], "Jz" : [ "OP_J" , "SZ_Z" ], "Jb" : [ "OP_J" , "SZ_B" ], "R" : [ "OP_R" , "SZ_RDQ" ], "C" : [ "OP_C" , "SZ_NA" ], "D" : [ "OP_D" , "SZ_NA" ], "S" : [ "OP_S" , "SZ_NA" ], "Ob" : [ "OP_O" , "SZ_B" ], "Ow" : [ "OP_O" , "SZ_W" ], "Ov" : [ "OP_O" , "SZ_V" ], "V" : [ "OP_V" , "SZ_NA" ], "W" : [ "OP_W" , "SZ_NA" ], "P" : [ "OP_P" , "SZ_NA" ], "Q" : [ "OP_Q" , "SZ_NA" ], "VR" : [ "OP_VR" , "SZ_NA" ], "PR" : [ "OP_PR" , "SZ_NA" ], "AL" : [ "OP_AL" , "SZ_NA" ], "CL" : [ "OP_CL" , "SZ_NA" ], "DL" : [ "OP_DL" , "SZ_NA" ], "BL" : [ "OP_BL" , "SZ_NA" ], "AH" : [ "OP_AH" , "SZ_NA" ], "CH" : [ "OP_CH" , "SZ_NA" ], "DH" : [ "OP_DH" , "SZ_NA" ], "BH" : [ "OP_BH" , "SZ_NA" ], "AX" : [ "OP_AX" , "SZ_NA" ], "CX" : [ "OP_CX" , "SZ_NA" ], "DX" : [ "OP_DX" , "SZ_NA" ], "BX" : [ "OP_BX" , "SZ_NA" ], "SI" : [ "OP_SI" , "SZ_NA" ], "DI" : [ "OP_DI" , "SZ_NA" ], "SP" : [ "OP_SP" , "SZ_NA" ], "BP" : [ "OP_BP" , "SZ_NA" ], "eAX" : [ "OP_eAX" , "SZ_NA" ], "eCX" : [ "OP_eCX" , "SZ_NA" ], "eDX" : [ "OP_eDX" , "SZ_NA" ], "eBX" : [ "OP_eBX" , "SZ_NA" ], "eSI" : [ "OP_eSI" , "SZ_NA" ], "eDI" : [ "OP_eDI" , "SZ_NA" ], "eSP" : [ "OP_eSP" , "SZ_NA" ], "eBP" : [ "OP_eBP" , "SZ_NA" ], "rAX" : [ "OP_rAX" , "SZ_NA" ], "rCX" : [ "OP_rCX" , "SZ_NA" ], "rBX" : [ "OP_rBX" , "SZ_NA" ], "rDX" : [ "OP_rDX" , "SZ_NA" ], "rSI" : [ "OP_rSI" , "SZ_NA" ], "rDI" : [ "OP_rDI" , "SZ_NA" ], "rSP" : [ "OP_rSP" , "SZ_NA" ], "rBP" : [ "OP_rBP" , "SZ_NA" ], "ES" : [ "OP_ES" , "SZ_NA" ], "CS" : [ "OP_CS" , "SZ_NA" ], "DS" : [ "OP_DS" , "SZ_NA" ], "SS" : [ "OP_SS" , "SZ_NA" ], "GS" : [ "OP_GS" , "SZ_NA" ], "FS" : [ "OP_FS" , "SZ_NA" ], "ST0" : [ "OP_ST0" , "SZ_NA" ], "ST1" : [ "OP_ST1" , "SZ_NA" ], "ST2" : [ "OP_ST2" , "SZ_NA" ], "ST3" : [ "OP_ST3" , "SZ_NA" ], "ST4" : [ "OP_ST4" , "SZ_NA" ], "ST5" : [ "OP_ST5" , "SZ_NA" ], "ST6" : [ "OP_ST6" , "SZ_NA" ], "ST7" : [ "OP_ST7" , "SZ_NA" ], "NONE" : [ "OP_NONE" , "SZ_NA" ], "ALr8b" : [ "OP_ALr8b" , "SZ_NA" ], "CLr9b" : [ "OP_CLr9b" , "SZ_NA" ], "DLr10b" : [ "OP_DLr10b" , "SZ_NA" ], "BLr11b" : [ "OP_BLr11b" , "SZ_NA" ], "AHr12b" : [ "OP_AHr12b" , "SZ_NA" ], "CHr13b" : [ "OP_CHr13b" , "SZ_NA" ], "DHr14b" : [ "OP_DHr14b" , "SZ_NA" ], "BHr15b" : [ "OP_BHr15b" , "SZ_NA" ], "rAXr8" : [ "OP_rAXr8" , "SZ_NA" ], "rCXr9" : [ "OP_rCXr9" , "SZ_NA" ], "rDXr10" : [ "OP_rDXr10" , "SZ_NA" ], "rBXr11" : [ "OP_rBXr11" , "SZ_NA" ], "rSPr12" : [ "OP_rSPr12" , "SZ_NA" ], "rBPr13" : [ "OP_rBPr13" , "SZ_NA" ], "rSIr14" : [ "OP_rSIr14" , "SZ_NA" ], "rDIr15" : [ "OP_rDIr15" , "SZ_NA" ], "jWP" : [ "OP_J" , "SZ_WP" ], "jDP" : [ "OP_J" , "SZ_DP" ], } # # opcode prefix dictionary # pfx_dict = { "aso" : "P_aso", "oso" : "P_oso", "rexw" : "P_rexw", "rexb" : "P_rexb", "rexx" : "P_rexx", "rexr" : "P_rexr", "inv64" : "P_inv64", "def64" : "P_def64", "depM" : "P_depM", "cast1" : "P_c1", "cast2" : "P_c2", "cast3" : "P_c3" } # # globals # opr_constants = [] siz_constants = [] tables = {} table_sizes = {} mnm_list = [] default_opr = 'O_NONE, O_NONE, O_NONE' # # collect the operand/size constants # for o in operand_dict.keys(): if not (operand_dict[o][0] in opr_constants): opr_constants.append(operand_dict[o][0]) if not (operand_dict[o][1] in siz_constants): siz_constants.append(operand_dict[o][1]) xmlDoc = minidom.parse(sys.argv[1]) tlNode = xmlDoc.firstChild # # look for top-level optable node # while tlNode and tlNode.localName != "x86optable": tlNode = tlNode.nextSibling # # creates a table entry # def centry(i, defmap): if defmap["type"][0:3] == "grp": opr = default_opr mnm = 'UD_I' + defmap["type"].lower() pfx = defmap["name"].upper() elif defmap["type"] == "leaf": mnm = "UD_I" + defmap["name"] opr = defmap["opr"] pfx = defmap["pfx"] if len(mnm) == 0: mnm = "UD_Ina" if len(opr) == 0: opr = default_opr if len(pfx) == 0: pfx = "P_none" else: opr = default_opr pfx = "P_none" mnm = "UD_Iinvalid" return " /* %s */ { %-16s %-26s %s },\n" % (i, mnm + ',', opr + ',', pfx) # # makes a new table and adds it to the global # list of tables # def mktab(name, size): if not (name in tables.keys()): tables[name] = {} table_sizes[name] = size for node in tlNode.childNodes: opcodes = [] iclass = '' vendor = '' # we are only interested in if node.localName != 'instruction': continue # we need the mnemonic attribute if not ('mnemonic' in node.attributes.keys()): print "error: no mnemonic given in ." sys.exit(-1) # check if this instruction was already defined. # else add it to the global list of mnemonics mnemonic = node.attributes['mnemonic'].value if mnemonic in mnm_list: print "error: multiple declarations of mnemonic='%s'" % mnemonic; sys.exit(-1) else: mnm_list.append(mnemonic) # # collect instruction # - vendor # - class # for n in node.childNodes: if n.localName == 'vendor': vendor = (n.firstChild.data).strip(); elif n.localName == 'class': iclass = n.firstChild.data; # # for each opcode definition # for n in node.childNodes: if n.localName != 'opcode': continue; opcode = n.firstChild.data.strip(); parts = opcode.split(";"); flags = [] opr = [] pfx = [] opr = [] pfx_c = [] # get cast attribute, if given if 'cast' in n.attributes.keys(): pfx_c.append( "P_c" + n.attributes['cast'].value ) # get implicit addressing attribute, if given if 'imp_addr' in n.attributes.keys(): if int( n.attributes['imp_addr'].value ): pfx_c.append( "P_ImpAddr" ) # get mode attribute, if given if 'mode' in n.attributes.keys(): v = (n.attributes['mode'].value).strip() modef = v.split(); for m in modef: if not (m in pfx_dict): print "warning: unrecognized mode attribute '%s'" % m else: pfx_c.append(pfx_dict[m]) # # split opcode definition into # 1. prefixes (pfx) # 2. opcode bytes (opc) # 3. operands # if len(parts) == 1: opc = parts[0].split() elif len(parts) == 2: opc = parts[0].split() opr = parts[1].split() for o in opc: if o in pfx_dict: pfx = parts[0].split() opc = parts[1].split() break elif len(parts) == 3: pfx = parts[0].split() opc = parts[1].split() opr = parts[2].split() else: print "error: invalid opcode definition of %s\n" % mnemonic sys.exit(-1) # Convert opcodes to upper case for i in range(len(opc)): opc[i] = opc[i].upper() # # check for special cases of instruction translation # and ignore them # if mnemonic == 'pause' or \ ( mnemonic == 'nop' and opc[0] == '90' ) or \ mnemonic == 'invalid' or \ mnemonic == 'db' : continue # # Convert prefix # for p in pfx: if not ( p in pfx_dict.keys() ): print "error: invalid prefix specification: %s \n" % pfx pfx_c.append( pfx_dict[p] ) if len(pfx) == 0: pfx_c.append( "P_none" ) pfx = "|".join( pfx_c ) # # Convert operands # opr_c = [ "O_NONE", "O_NONE", "O_NONE" ] for i in range(len(opr)): if not (opr[i] in operand_dict.keys()): print "error: invalid operand declaration: %s\n" % opr[i] opr_c[i] = "O_" + opr[i] opr = "%-8s %-8s %s" % (opr_c[0] + ",", opr_c[1] + ",", opr_c[2]) table_sse = '' table_name = 'itab__1byte' table_size = 256 table_index = '' for op in opc: if op[0:3] == 'SSE': table_sse = op elif op == '0F' and len(table_sse): table_name = "itab__pfx_%s__0f" % table_sse table_size = 256 table_sse = '' elif op == '0F': table_name = "itab__0f" table_size = 256 elif op == '38' and (table_name == "itab__0f" or table_name == "itab__pfx_SSE66__0f"): table_index = '38' tables[table_name][table_index] = { \ 'type' : 'grp_3byte', \ 'name' : "%s__38" % (table_name) \ } table_name = tables[table_name][table_index]['name'] table_size = 256 elif op[0:5] == '/X87=': tables[table_name][table_index] = { \ 'type' : 'grp_x87', \ 'name' : "%s__op_%s__x87" % (table_name, table_index) \ } table_name = tables[table_name][table_index]['name'] table_index = "%02X" % int(op[5:7], 16) table_size = 64 elif op[0:4] == '/RM=': tables[table_name][table_index] = { \ 'type' : 'grp_rm', \ 'name' : "%s__op_%s__rm" % (table_name, table_index) \ } table_name = tables[table_name][table_index]['name'] table_index = "%02X" % int(op[4:6]) table_size = 8 elif op[0:5] == '/MOD=': tables[table_name][table_index] = { \ 'type' : 'grp_mod', \ 'name' : "%s__op_%s__mod" % (table_name, table_index) \ } table_name = tables[table_name][table_index]['name'] if len(op) == 8: v = op[5:8] else: v = op[5:7] mod_dict = { '!11' : 0, '11' : 1 } table_index = "%02X" % int(mod_dict[v]) table_size = 2 elif op[0:2] == '/O': tables[table_name][table_index] = { \ 'type' : 'grp_osize', \ 'name' : "%s__op_%s__osize" % (table_name, table_index) \ } table_name = tables[table_name][table_index]['name'] table_index = "%02X" % int(mode_dict[op[2:4]]) table_size = 3 elif op[0:2] == '/A': tables[table_name][table_index] = { \ 'type' : 'grp_asize', \ 'name' : "%s__op_%s__asize" % (table_name, table_index) \ } table_name = tables[table_name][table_index]['name'] table_index = "%02X" % int(mode_dict[op[2:4]]) table_size = 3 elif op[0:2] == '/M': tables[table_name][table_index] = { \ 'type' : 'grp_mode', \ 'name' : "%s__op_%s__mode" % (table_name, table_index) \ } table_name = tables[table_name][table_index]['name'] table_index = "%02X" % int(mode_dict[op[2:4]]) table_size = 3 elif op[0:6] == '/3DNOW': table_name = "itab__3dnow" table_size = 256 elif op[0:1] == '/': tables[table_name][table_index] = { \ 'type' : 'grp_reg', \ 'name' : "%s__op_%s__reg" % (table_name, table_index) \ } table_name = tables[table_name][table_index]['name'] table_index = "%02X" % int(op[1:2]) table_size = 8 else: table_index = op mktab(table_name, table_size) if len(vendor): tables[table_name][table_index] = { \ 'type' : 'grp_vendor', \ 'name' : "%s__op_%s__vendor" % (table_name, table_index) \ } table_name = tables[table_name][table_index]['name'] table_index = vend_dict[vendor] table_size = 3 mktab(table_name, table_size) tables[table_name][table_index] = { \ 'type' : 'leaf', \ 'name' : mnemonic, \ 'pfx' : pfx, \ 'opr' : opr, \ 'flags' : flags \ } if len(vendor): tables[table_name][vend_dict['ANY']] = { \ 'type' : 'leaf', \ 'name' : mnemonic, \ 'pfx' : pfx, \ 'opr' : opr, \ 'flags' : flags \ } # --------------------------------------------------------------------- # Generate itab.h # --------------------------------------------------------------------- f = open("itab.h", "w") f.write(''' /* itab.h -- auto generated by opgen.py, do not edit. */ #ifndef UD_ITAB_H #define UD_ITAB_H ''') # # Generate enumeration of size constants # siz_constants.sort() f.write(''' ''') f.write("\nenum ud_itab_vendor_index {\n" ) f.write(" ITAB__VENDOR_INDX__AMD,\n" ) f.write(" ITAB__VENDOR_INDX__INTEL,\n" ) f.write(" ITAB__VENDOR_INDX__ANY,\n" ) f.write("};\n\n") f.write("\nenum ud_itab_mode_index {\n" ) f.write(" ITAB__MODE_INDX__16,\n" ) f.write(" ITAB__MODE_INDX__32,\n" ) f.write(" ITAB__MODE_INDX__64\n" ) f.write("};\n\n") f.write("\nenum ud_itab_mod_index {\n" ) f.write(" ITAB__MOD_INDX__NOT_11,\n" ) f.write(" ITAB__MOD_INDX__11\n" ) f.write("};\n\n") # # Generate enumeration of the tables # table_names = tables.keys() table_names.sort(); f.write( "\nenum ud_itab_index {\n" ) for name in table_names: f.write(" %s,\n" % name.upper() ); f.write( "};\n\n" ) # # Generate mnemonics list # f.write("\nenum __attribute__((packed)) ud_mnemonic_code {\n") for m in mnm_list: f.write(" UD_I%s,\n" % m) for m in spl_mnm_types: f.write(" UD_I%s,\n" % m) f.write("};\n\n") # # Generate operand definitions # f.write("\n/* itab entry operand definitions */\n\n"); operands = operand_dict.keys() operands.sort() for o in operands: f.write("#define O_%-7s { %-12s %-8s }\n" % (o, operand_dict[o][0] + ",", operand_dict[o][1])); f.write("\n"); # # Generate struct defs # f.write( \ ''' extern const char* ud_mnemonics_str[];; extern struct ud_itab_entry* ud_itab_list[]; ''' ) f.write("#endif\n") f.close() # --------------------------------------------------------------------- # Generate itab.c # --------------------------------------------------------------------- f = open("itab.c", "w") f.write(''' /* itab.c -- auto generated by opgen.py, do not edit. */ #include "types.h" #include "decode.h" #include "itab.h" ''') # # generate mnemonic list # f.write("const char * ud_mnemonics_str[] = {\n") for m in mnm_list: f.write(" \"%s\",\n" % m ) f.write("};\n\n") # # generate instruction tables # f.write("\n") for t in table_names: f.write("\nstatic struct ud_itab_entry " + t.lower() + "[%d] = {\n" % table_sizes[t]); for i in range(int(table_sizes[t])): index = "%02X" % i if index in tables[t]: f.write(centry(index, tables[t][index])) else: f.write(centry(index,{"type":"invalid"})) f.write("};\n"); # # write the instruction table list # f.write( "\n/* the order of this table matches enum ud_itab_index */") f.write( "\nstruct ud_itab_entry * ud_itab_list[] = {\n" ) for name in table_names: f.write( " %s,\n" % name.lower() ) f.write( "};\n" ); f.close(); # vim:expandtab # vim:sw=4 # vim:ts=4