]> andersk Git - udis86.git/blame - libudis86/decode.c
more updates
[udis86.git] / libudis86 / decode.c
CommitLineData
bbe45369 1/* udis86 - libudis86/decode.c
2 *
3 * Copyright (c) 2002-2009 Vivek Thampi
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26#ifndef __UD_STANDALONE__
27# include <string.h>
28#endif /* __UD_STANDALONE__ */
29
30#include "types.h"
31#include "itab.h"
32#include "input.h"
33#include "decode.h"
34
35/* The max number of prefixes to an instruction */
36#define MAX_PREFIXES 15
37
38static struct ud_itab_entry ie_invalid = { UD_Iinvalid, O_NONE, O_NONE, O_NONE, P_none };
39static struct ud_itab_entry ie_pause = { UD_Ipause, O_NONE, O_NONE, O_NONE, P_none };
40static struct ud_itab_entry ie_nop = { UD_Inop, O_NONE, O_NONE, O_NONE, P_none };
41
42
43/* Looks up mnemonic code in the mnemonic string table
44 * Returns NULL if the mnemonic code is invalid
45 */
46const char * ud_lookup_mnemonic( enum ud_mnemonic_code c )
47{
48 if ( c < UD_Id3vil )
49 return ud_mnemonics_str[ c ];
50 return NULL;
51}
52
53
54/* Extracts instruction prefixes.
55 */
56static int get_prefixes( struct ud* u )
57{
58 unsigned int have_pfx = 1;
59 unsigned int i;
60 uint8_t curr;
61
62 /* if in error state, bail out */
63 if ( u->error )
64 return -1;
65
66 /* keep going as long as there are prefixes available */
67 for ( i = 0; have_pfx ; ++i ) {
68
69 /* Get next byte. */
70 inp_next(u);
71 if ( u->error )
72 return -1;
73 curr = inp_curr( u );
74
75 /* rex prefixes in 64bit mode */
76 if ( u->dis_mode == 64 && ( curr & 0xF0 ) == 0x40 ) {
77 u->pfx_rex = curr;
78 } else {
79 switch ( curr )
80 {
81 case 0x2E :
82 u->pfx_seg = UD_R_CS;
83 u->pfx_rex = 0;
84 break;
85 case 0x36 :
86 u->pfx_seg = UD_R_SS;
87 u->pfx_rex = 0;
88 break;
89 case 0x3E :
90 u->pfx_seg = UD_R_DS;
91 u->pfx_rex = 0;
92 break;
93 case 0x26 :
94 u->pfx_seg = UD_R_ES;
95 u->pfx_rex = 0;
96 break;
97 case 0x64 :
98 u->pfx_seg = UD_R_FS;
99 u->pfx_rex = 0;
100 break;
101 case 0x65 :
102 u->pfx_seg = UD_R_GS;
103 u->pfx_rex = 0;
104 break;
105 case 0x67 : /* adress-size override prefix */
106 u->pfx_adr = 0x67;
107 u->pfx_rex = 0;
108 break;
109 case 0xF0 :
110 u->pfx_lock = 0xF0;
111 u->pfx_rex = 0;
112 break;
113 case 0x66:
114 /* the 0x66 sse prefix is only effective if no other sse prefix
115 * has already been specified.
116 */
117 if ( !u->pfx_insn ) u->pfx_insn = 0x66;
118 u->pfx_opr = 0x66;
119 u->pfx_rex = 0;
120 break;
121 case 0xF2:
122 u->pfx_insn = 0xF2;
123 u->pfx_repne = 0xF2;
124 u->pfx_rex = 0;
125 break;
126 case 0xF3:
127 u->pfx_insn = 0xF3;
128 u->pfx_rep = 0xF3;
129 u->pfx_repe = 0xF3;
130 u->pfx_rex = 0;
131 break;
132 default :
133 /* No more prefixes */
134 have_pfx = 0;
135 break;
136 }
137 }
138
139 /* check if we reached max instruction length */
140 if ( i + 1 == MAX_INSN_LENGTH ) {
141 u->error = 1;
142 break;
143 }
144 }
145
146 /* return status */
147 if ( u->error )
148 return -1;
149
150 /* rewind back one byte in stream, since the above loop
151 * stops with a non-prefix byte.
152 */
153 inp_back(u);
154
155 /* speculatively determine the effective operand mode,
156 * based on the prefixes and the current disassembly
157 * mode. This may be inaccurate, but useful for mode
158 * dependent decoding.
159 */
160 if ( u->dis_mode == 64 ) {
161 u->opr_mode = REX_W( u->pfx_rex ) ? 64 : ( ( u->pfx_opr ) ? 16 : 32 ) ;
162 u->adr_mode = ( u->pfx_adr ) ? 32 : 64;
163 } else if ( u->dis_mode == 32 ) {
164 u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
165 u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
166 } else if ( u->dis_mode == 16 ) {
167 u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
168 u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
169 }
170
171 return 0;
172}
173
174
175/* Searches the instruction tables for the right entry.
176 */
177static int search_itab( struct ud * u )
178{
179 struct ud_itab_entry * e = NULL;
180 enum ud_itab_index table;
181 uint8_t peek;
182 uint8_t did_peek = 0;
183 uint8_t curr;
184 uint8_t index;
185
186 /* if in state of error, return */
187 if ( u->error )
188 return -1;
189
190 /* get first byte of opcode. */
191 inp_next(u);
192 if ( u->error )
193 return -1;
194 curr = inp_curr(u);
195
196 /* resolve xchg, nop, pause crazyness */
197 if ( 0x90 == curr ) {
198 if ( !( u->dis_mode == 64 && REX_B( u->pfx_rex ) ) ) {
199 if ( u->pfx_rep ) {
200 u->pfx_rep = 0;
201 e = & ie_pause;
202 } else {
203 e = & ie_nop;
204 }
205 goto found_entry;
206 }
207 }
208
209 /* get top-level table */
210 if ( 0x0F == curr ) {
211 table = ITAB__0F;
212 curr = inp_next(u);
213 if ( u->error )
214 return -1;
215
216 /* 2byte opcodes can be modified by 0x66, F3, and F2 prefixes */
217 if ( 0x66 == u->pfx_insn ) {
218 if ( ud_itab_list[ ITAB__PFX_SSE66__0F ][ curr ].mnemonic != UD_Iinvalid ) {
219 table = ITAB__PFX_SSE66__0F;
220 u->pfx_opr = 0;
221 }
222 } else if ( 0xF2 == u->pfx_insn ) {
223 if ( ud_itab_list[ ITAB__PFX_SSEF2__0F ][ curr ].mnemonic != UD_Iinvalid ) {
224 table = ITAB__PFX_SSEF2__0F;
225 u->pfx_repne = 0;
226 }
227 } else if ( 0xF3 == u->pfx_insn ) {
228 if ( ud_itab_list[ ITAB__PFX_SSEF3__0F ][ curr ].mnemonic != UD_Iinvalid ) {
229 table = ITAB__PFX_SSEF3__0F;
230 u->pfx_repe = 0;
231 u->pfx_rep = 0;
232 }
233 }
234 /* pick an instruction from the 1byte table */
235 } else {
236 table = ITAB__1BYTE;
237 }
238
239 index = curr;
240
241search:
242
243 e = & ud_itab_list[ table ][ index ];
244
245 /* if mnemonic constant is a standard instruction constant
246 * our search is over.
247 */
248
249 if ( e->mnemonic < UD_Id3vil ) {
250 if ( e->mnemonic == UD_Iinvalid ) {
251 if ( did_peek ) {
252 inp_next( u ); if ( u->error ) return -1;
253 }
254 goto found_entry;
255 }
256 goto found_entry;
257 }
258
259 table = e->prefix;
260
261 switch ( e->mnemonic )
262 {
263 case UD_Igrp_reg:
264 peek = inp_peek( u );
265 did_peek = 1;
266 index = MODRM_REG( peek );
267 break;
268
269 case UD_Igrp_mod:
270 peek = inp_peek( u );
271 did_peek = 1;
272 index = MODRM_MOD( peek );
273 if ( index == 3 )
274 index = ITAB__MOD_INDX__11;
275 else
276 index = ITAB__MOD_INDX__NOT_11;
277 break;
278
279 case UD_Igrp_rm:
280 curr = inp_next( u );
281 did_peek = 0;
282 if ( u->error )
283 return -1;
284 index = MODRM_RM( curr );
285 break;
286
287 case UD_Igrp_x87:
288 curr = inp_next( u );
289 did_peek = 0;
290 if ( u->error )
291 return -1;
292 index = curr - 0xC0;
293 break;
294
295 case UD_Igrp_3byte:
296 curr = inp_next( u );
297 did_peek = 0;
298 if (u->error)
299 return -1;
300 index = curr;
301 break;
302
303 case UD_Igrp_osize:
304 if ( u->opr_mode == 64 )
305 index = ITAB__MODE_INDX__64;
306 else if ( u->opr_mode == 32 )
307 index = ITAB__MODE_INDX__32;
308 else
309 index = ITAB__MODE_INDX__16;
310 break;
311
312 case UD_Igrp_asize:
313 if ( u->adr_mode == 64 )
314 index = ITAB__MODE_INDX__64;
315 else if ( u->adr_mode == 32 )
316 index = ITAB__MODE_INDX__32;
317 else
318 index = ITAB__MODE_INDX__16;
319 break;
320
321 case UD_Igrp_mode:
322 if ( u->dis_mode == 64 )
323 index = ITAB__MODE_INDX__64;
324 else if ( u->dis_mode == 32 )
325 index = ITAB__MODE_INDX__32;
326 else
327 index = ITAB__MODE_INDX__16;
328 break;
329
330 case UD_Igrp_vendor:
331 if ( u->vendor == UD_VENDOR_INTEL )
332 index = ITAB__VENDOR_INDX__INTEL;
333 else if ( u->vendor == UD_VENDOR_AMD )
334 index = ITAB__VENDOR_INDX__AMD;
335 else if ( u->vendor == UD_VENDOR_ANY )
336 index = ITAB__VENDOR_INDX__ANY;
337 else
338 return -1;
339 break;
340
341 case UD_Id3vil:
342 return -1;
343 break;
344
345 default:
346 return -1;
347 break;
348 }
349
350 goto search;
351
352found_entry:
353
354 u->itab_entry = e;
355 u->mnemonic = u->itab_entry->mnemonic;
356
357 return 0;
358}
359
360
361static unsigned int resolve_operand_size( const struct ud * u, unsigned int s )
362{
363 switch ( s )
364 {
365 case SZ_V:
366 return ( u->opr_mode );
367 case SZ_Z:
368 return ( u->opr_mode == 16 ) ? 16 : 32;
369 case SZ_P:
370 return ( u->opr_mode == 16 ) ? SZ_WP : SZ_DP;
371 case SZ_MDQ:
372 return ( u->opr_mode == 16 ) ? 32 : u->opr_mode;
373 case SZ_RDQ:
374 return ( u->dis_mode == 64 ) ? 64 : 32;
375 default:
376 return s;
377 }
378}
379
380
381static int resolve_mnemonic( struct ud* u )
382{
383 /* far/near flags */
384 u->br_far = 0;
385 u->br_near = 0;
386 /* readjust operand sizes for call/jmp instrcutions */
387 if ( u->mnemonic == UD_Icall || u->mnemonic == UD_Ijmp ) {
388 /* WP: 16bit pointer */
389 if ( u->operand[ 0 ].size == SZ_WP ) {
390 u->operand[ 0 ].size = 16;
391 u->br_far = 1;
392 u->br_near= 0;
393 /* DP: 32bit pointer */
394 } else if ( u->operand[ 0 ].size == SZ_DP ) {
395 u->operand[ 0 ].size = 32;
396 u->br_far = 1;
397 u->br_near= 0;
398 } else {
399 u->br_far = 0;
400 u->br_near= 1;
401 }
402 /* resolve 3dnow weirdness. */
403 } else if ( u->mnemonic == UD_I3dnow ) {
404 u->mnemonic = ud_itab_list[ ITAB__3DNOW ][ inp_curr( u ) ].mnemonic;
405 }
406 /* SWAPGS is only valid in 64bits mode */
407 if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
408 u->error = 1;
409 return -1;
410 }
411
412 return 0;
413}
414
415
416/* -----------------------------------------------------------------------------
417 * decode_a()- Decodes operands of the type seg:offset
418 * -----------------------------------------------------------------------------
419 */
420static void
421decode_a(struct ud* u, struct ud_operand *op)
422{
423 if (u->opr_mode == 16) {
424 /* seg16:off16 */
425 op->type = UD_OP_PTR;
426 op->size = 32;
427 op->lval.ptr.off = inp_uint16(u);
428 op->lval.ptr.seg = inp_uint16(u);
429 } else {
430 /* seg16:off32 */
431 op->type = UD_OP_PTR;
432 op->size = 48;
433 op->lval.ptr.off = inp_uint32(u);
434 op->lval.ptr.seg = inp_uint16(u);
435 }
436}
437
438/* -----------------------------------------------------------------------------
439 * decode_gpr() - Returns decoded General Purpose Register
440 * -----------------------------------------------------------------------------
441 */
442static enum ud_type
443decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
444{
445 s = resolve_operand_size(u, s);
446
447 switch (s) {
448 case 64:
449 return UD_R_RAX + rm;
450 case SZ_DP:
451 case 32:
452 return UD_R_EAX + rm;
453 case SZ_WP:
454 case 16:
455 return UD_R_AX + rm;
456 case 8:
457 if (u->dis_mode == 64 && u->pfx_rex) {
458 if (rm >= 4)
459 return UD_R_SPL + (rm-4);
460 return UD_R_AL + rm;
461 } else return UD_R_AL + rm;
462 default:
463 return 0;
464 }
465}
466
467/* -----------------------------------------------------------------------------
468 * resolve_gpr64() - 64bit General Purpose Register-Selection.
469 * -----------------------------------------------------------------------------
470 */
471static enum ud_type
472resolve_gpr64(struct ud* u, enum ud_operand_code gpr_op)
473{
474 if (gpr_op >= OP_rAXr8 && gpr_op <= OP_rDIr15)
475 gpr_op = (gpr_op - OP_rAXr8) | (REX_B(u->pfx_rex) << 3);
476 else gpr_op = (gpr_op - OP_rAX);
477
478 if (u->opr_mode == 16)
479 return gpr_op + UD_R_AX;
480 if (u->dis_mode == 32 ||
481 (u->opr_mode == 32 && ! (REX_W(u->pfx_rex) || u->default64))) {
482 return gpr_op + UD_R_EAX;
483 }
484
485 return gpr_op + UD_R_RAX;
486}
487
488/* -----------------------------------------------------------------------------
489 * resolve_gpr32 () - 32bit General Purpose Register-Selection.
490 * -----------------------------------------------------------------------------
491 */
492static enum ud_type
493resolve_gpr32(struct ud* u, enum ud_operand_code gpr_op)
494{
495 gpr_op = gpr_op - OP_eAX;
496
497 if (u->opr_mode == 16)
498 return gpr_op + UD_R_AX;
499
500 return gpr_op + UD_R_EAX;
501}
502
503/* -----------------------------------------------------------------------------
504 * resolve_reg() - Resolves the register type
505 * -----------------------------------------------------------------------------
506 */
507static enum ud_type
508resolve_reg(struct ud* u, unsigned int type, unsigned char i)
509{
510 switch (type) {
511 case T_MMX : return UD_R_MM0 + (i & 7);
512 case T_XMM : return UD_R_XMM0 + i;
513 case T_CRG : return UD_R_CR0 + i;
514 case T_DBG : return UD_R_DR0 + i;
515 case T_SEG : return UD_R_ES + (i & 7);
516 case T_NONE:
517 default: return UD_NONE;
518 }
519}
520
521/* -----------------------------------------------------------------------------
522 * decode_imm() - Decodes Immediate values.
523 * -----------------------------------------------------------------------------
524 */
525static void
526decode_imm(struct ud* u, unsigned int s, struct ud_operand *op)
527{
528 op->size = resolve_operand_size(u, s);
529 op->type = UD_OP_IMM;
530
531 switch (op->size) {
532 case 8: op->lval.sbyte = inp_uint8(u); break;
533 case 16: op->lval.uword = inp_uint16(u); break;
534 case 32: op->lval.udword = inp_uint32(u); break;
535 case 64: op->lval.uqword = inp_uint64(u); break;
536 default: return;
537 }
538}
539
540/* -----------------------------------------------------------------------------
541 * decode_modrm() - Decodes ModRM Byte
542 * -----------------------------------------------------------------------------
543 */
544static void
545decode_modrm(struct ud* u, struct ud_operand *op, unsigned int s,
546 unsigned char rm_type, struct ud_operand *opreg,
547 unsigned char reg_size, unsigned char reg_type)
548{
549 unsigned char mod, rm, reg;
550
551 inp_next(u);
552
553 /* get mod, r/m and reg fields */
554 mod = MODRM_MOD(inp_curr(u));
555 rm = (REX_B(u->pfx_rex) << 3) | MODRM_RM(inp_curr(u));
556 reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(inp_curr(u));
557
558 op->size = resolve_operand_size(u, s);
559
560 /* if mod is 11b, then the UD_R_m specifies a gpr/mmx/sse/control/debug */
561 if (mod == 3) {
562 op->type = UD_OP_REG;
563 if (rm_type == T_GPR)
564 op->base = decode_gpr(u, op->size, rm);
565 else op->base = resolve_reg(u, rm_type, (REX_B(u->pfx_rex) << 3) | (rm&7));
566 }
567 /* else its memory addressing */
568 else {
569 op->type = UD_OP_MEM;
570
571 /* 64bit addressing */
572 if (u->adr_mode == 64) {
573
574 op->base = UD_R_RAX + rm;
575
576 /* get offset type */
577 if (mod == 1)
578 op->offset = 8;
579 else if (mod == 2)
580 op->offset = 32;
581 else if (mod == 0 && (rm & 7) == 5) {
582 op->base = UD_R_RIP;
583 op->offset = 32;
584 } else op->offset = 0;
585
586 /* Scale-Index-Base (SIB) */
587 if ((rm & 7) == 4) {
588 inp_next(u);
589
590 op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
591 op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
592 op->base = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
593
594 /* special conditions for base reference */
595 if (op->index == UD_R_RSP) {
596 op->index = UD_NONE;
597 op->scale = UD_NONE;
598 }
599
600 if (op->base == UD_R_RBP || op->base == UD_R_R13) {
601 if (mod == 0)
602 op->base = UD_NONE;
603 if (mod == 1)
604 op->offset = 8;
605 else op->offset = 32;
606 }
607 }
608 }
609
610 /* 32-Bit addressing mode */
611 else if (u->adr_mode == 32) {
612
613 /* get base */
614 op->base = UD_R_EAX + rm;
615
616 /* get offset type */
617 if (mod == 1)
618 op->offset = 8;
619 else if (mod == 2)
620 op->offset = 32;
621 else if (mod == 0 && rm == 5) {
622 op->base = UD_NONE;
623 op->offset = 32;
624 } else op->offset = 0;
625
626 /* Scale-Index-Base (SIB) */
627 if ((rm & 7) == 4) {
628 inp_next(u);
629
630 op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
631 op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
632 op->base = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
633
634 if (op->index == UD_R_ESP) {
635 op->index = UD_NONE;
636 op->scale = UD_NONE;
637 }
638
639 /* special condition for base reference */
640 if (op->base == UD_R_EBP) {
641 if (mod == 0)
642 op->base = UD_NONE;
643 if (mod == 1)
644 op->offset = 8;
645 else op->offset = 32;
646 }
647 }
648 }
649
650 /* 16bit addressing mode */
651 else {
652 switch (rm) {
653 case 0: op->base = UD_R_BX; op->index = UD_R_SI; break;
654 case 1: op->base = UD_R_BX; op->index = UD_R_DI; break;
655 case 2: op->base = UD_R_BP; op->index = UD_R_SI; break;
656 case 3: op->base = UD_R_BP; op->index = UD_R_DI; break;
657 case 4: op->base = UD_R_SI; break;
658 case 5: op->base = UD_R_DI; break;
659 case 6: op->base = UD_R_BP; break;
660 case 7: op->base = UD_R_BX; break;
661 }
662
663 if (mod == 0 && rm == 6) {
664 op->offset= 16;
665 op->base = UD_NONE;
666 }
667 else if (mod == 1)
668 op->offset = 8;
669 else if (mod == 2)
670 op->offset = 16;
671 }
672 }
673
674 /* extract offset, if any */
675 switch(op->offset) {
676 case 8 : op->lval.ubyte = inp_uint8(u); break;
677 case 16: op->lval.uword = inp_uint16(u); break;
678 case 32: op->lval.udword = inp_uint32(u); break;
679 case 64: op->lval.uqword = inp_uint64(u); break;
680 default: break;
681 }
682
683 /* resolve register encoded in reg field */
684 if (opreg) {
685 opreg->type = UD_OP_REG;
686 opreg->size = resolve_operand_size(u, reg_size);
687 if (reg_type == T_GPR)
688 opreg->base = decode_gpr(u, opreg->size, reg);
689 else opreg->base = resolve_reg(u, reg_type, reg);
690 }
691}
692
693/* -----------------------------------------------------------------------------
694 * decode_o() - Decodes offset
695 * -----------------------------------------------------------------------------
696 */
697static void
698decode_o(struct ud* u, unsigned int s, struct ud_operand *op)
699{
700 switch (u->adr_mode) {
701 case 64:
702 op->offset = 64;
703 op->lval.uqword = inp_uint64(u);
704 break;
705 case 32:
706 op->offset = 32;
707 op->lval.udword = inp_uint32(u);
708 break;
709 case 16:
710 op->offset = 16;
711 op->lval.uword = inp_uint16(u);
712 break;
713 default:
714 return;
715 }
716 op->type = UD_OP_MEM;
717 op->size = resolve_operand_size(u, s);
718}
719
720/* -----------------------------------------------------------------------------
721 * disasm_operands() - Disassembles Operands.
722 * -----------------------------------------------------------------------------
723 */
724static int disasm_operands(register struct ud* u)
725{
726
727
728 /* mopXt = map entry, operand X, type; */
729 enum ud_operand_code mop1t = u->itab_entry->operand1.type;
730 enum ud_operand_code mop2t = u->itab_entry->operand2.type;
731 enum ud_operand_code mop3t = u->itab_entry->operand3.type;
732
733 /* mopXs = map entry, operand X, size */
734 unsigned int mop1s = u->itab_entry->operand1.size;
735 unsigned int mop2s = u->itab_entry->operand2.size;
736 unsigned int mop3s = u->itab_entry->operand3.size;
737
738 /* iop = instruction operand */
739 register struct ud_operand* iop = u->operand;
740
741 switch(mop1t) {
742
743 case OP_A :
744 decode_a(u, &(iop[0]));
745 break;
746
747 /* M[b] ... */
748 case OP_M :
749 if (MODRM_MOD(inp_peek(u)) == 3)
750 u->error= 1;
751 /* E, G/P/V/I/CL/1/S */
752 case OP_E :
753 if (mop2t == OP_G) {
754 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_GPR);
755 if (mop3t == OP_I)
756 decode_imm(u, mop3s, &(iop[2]));
757 else if (mop3t == OP_CL) {
758 iop[2].type = UD_OP_REG;
759 iop[2].base = UD_R_CL;
760 iop[2].size = 8;
761 }
762 }
763 else if (mop2t == OP_P)
764 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_MMX);
765 else if (mop2t == OP_V)
766 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_XMM);
767 else if (mop2t == OP_S)
768 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_SEG);
769 else {
770 decode_modrm(u, &(iop[0]), mop1s, T_GPR, NULL, 0, T_NONE);
771 if (mop2t == OP_CL) {
772 iop[1].type = UD_OP_REG;
773 iop[1].base = UD_R_CL;
774 iop[1].size = 8;
775 } else if (mop2t == OP_I1) {
776 iop[1].type = UD_OP_CONST;
777 u->operand[1].lval.udword = 1;
778 } else if (mop2t == OP_I) {
779 decode_imm(u, mop2s, &(iop[1]));
780 }
781 }
782 break;
783
784 /* G, E/PR[,I]/VR */
785 case OP_G :
786 if (mop2t == OP_M) {
787 if (MODRM_MOD(inp_peek(u)) == 3)
788 u->error= 1;
789 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_GPR);
790 } else if (mop2t == OP_E) {
791 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_GPR);
792 if (mop3t == OP_I)
793 decode_imm(u, mop3s, &(iop[2]));
794 } else if (mop2t == OP_PR) {
795 decode_modrm(u, &(iop[1]), mop2s, T_MMX, &(iop[0]), mop1s, T_GPR);
796 if (mop3t == OP_I)
797 decode_imm(u, mop3s, &(iop[2]));
798 } else if (mop2t == OP_VR) {
799 if (MODRM_MOD(inp_peek(u)) != 3)
800 u->error = 1;
801 decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_GPR);
802 } else if (mop2t == OP_W)
803 decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_GPR);
804 break;
805
806 /* AL..BH, I/O/DX */
807 case OP_AL : case OP_CL : case OP_DL : case OP_BL :
808 case OP_AH : case OP_CH : case OP_DH : case OP_BH :
809
810 iop[0].type = UD_OP_REG;
811 iop[0].base = UD_R_AL + (mop1t - OP_AL);
812 iop[0].size = 8;
813
814 if (mop2t == OP_I)
815 decode_imm(u, mop2s, &(iop[1]));
816 else if (mop2t == OP_DX) {
817 iop[1].type = UD_OP_REG;
818 iop[1].base = UD_R_DX;
819 iop[1].size = 16;
820 }
821 else if (mop2t == OP_O)
822 decode_o(u, mop2s, &(iop[1]));
823 break;
824
825 /* rAX[r8]..rDI[r15], I/rAX..rDI/O */
826 case OP_rAXr8 : case OP_rCXr9 : case OP_rDXr10 : case OP_rBXr11 :
827 case OP_rSPr12: case OP_rBPr13: case OP_rSIr14 : case OP_rDIr15 :
828 case OP_rAX : case OP_rCX : case OP_rDX : case OP_rBX :
829 case OP_rSP : case OP_rBP : case OP_rSI : case OP_rDI :
830
831 iop[0].type = UD_OP_REG;
832 iop[0].base = resolve_gpr64(u, mop1t);
833
834 if (mop2t == OP_I)
835 decode_imm(u, mop2s, &(iop[1]));
836 else if (mop2t >= OP_rAX && mop2t <= OP_rDI) {
837 iop[1].type = UD_OP_REG;
838 iop[1].base = resolve_gpr64(u, mop2t);
839 }
840 else if (mop2t == OP_O) {
841 decode_o(u, mop2s, &(iop[1]));
842 iop[0].size = resolve_operand_size(u, mop2s);
843 }
844 break;
845
846 /* AL[r8b]..BH[r15b], I */
847 case OP_ALr8b : case OP_CLr9b : case OP_DLr10b : case OP_BLr11b :
848 case OP_AHr12b: case OP_CHr13b: case OP_DHr14b : case OP_BHr15b :
849 {
850 ud_type_t gpr = (mop1t - OP_ALr8b) + UD_R_AL +
851 (REX_B(u->pfx_rex) << 3);
852 if (UD_R_AH <= gpr && u->pfx_rex)
853 gpr = gpr + 4;
854 iop[0].type = UD_OP_REG;
855 iop[0].base = gpr;
856 if (mop2t == OP_I)
857 decode_imm(u, mop2s, &(iop[1]));
858 break;
859 }
860
861 /* eAX..eDX, DX/I */
862 case OP_eAX : case OP_eCX : case OP_eDX : case OP_eBX :
863 case OP_eSP : case OP_eBP : case OP_eSI : case OP_eDI :
864 iop[0].type = UD_OP_REG;
865 iop[0].base = resolve_gpr32(u, mop1t);
866 if (mop2t == OP_DX) {
867 iop[1].type = UD_OP_REG;
868 iop[1].base = UD_R_DX;
869 iop[1].size = 16;
870 } else if (mop2t == OP_I)
871 decode_imm(u, mop2s, &(iop[1]));
872 break;
873
874 /* ES..GS */
875 case OP_ES : case OP_CS : case OP_DS :
876 case OP_SS : case OP_FS : case OP_GS :
877
878 /* in 64bits mode, only fs and gs are allowed */
879 if (u->dis_mode == 64)
880 if (mop1t != OP_FS && mop1t != OP_GS)
881 u->error= 1;
882 iop[0].type = UD_OP_REG;
883 iop[0].base = (mop1t - OP_ES) + UD_R_ES;
884 iop[0].size = 16;
885
886 break;
887
888 /* J */
889 case OP_J :
890 decode_imm(u, mop1s, &(iop[0]));
891 iop[0].type = UD_OP_JIMM;
892 break ;
893
894 /* PR, I */
895 case OP_PR:
896 if (MODRM_MOD(inp_peek(u)) != 3)
897 u->error = 1;
898 decode_modrm(u, &(iop[0]), mop1s, T_MMX, NULL, 0, T_NONE);
899 if (mop2t == OP_I)
900 decode_imm(u, mop2s, &(iop[1]));
901 break;
902
903 /* VR, I */
904 case OP_VR:
905 if (MODRM_MOD(inp_peek(u)) != 3)
906 u->error = 1;
907 decode_modrm(u, &(iop[0]), mop1s, T_XMM, NULL, 0, T_NONE);
908 if (mop2t == OP_I)
909 decode_imm(u, mop2s, &(iop[1]));
910 break;
911
912 /* P, Q[,I]/W/E[,I],VR */
913 case OP_P :
914 if (mop2t == OP_Q) {
915 decode_modrm(u, &(iop[1]), mop2s, T_MMX, &(iop[0]), mop1s, T_MMX);
916 if (mop3t == OP_I)
917 decode_imm(u, mop3s, &(iop[2]));
918 } else if (mop2t == OP_W) {
919 decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_MMX);
920 } else if (mop2t == OP_VR) {
921 if (MODRM_MOD(inp_peek(u)) != 3)
922 u->error = 1;
923 decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_MMX);
924 } else if (mop2t == OP_E) {
925 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_MMX);
926 if (mop3t == OP_I)
927 decode_imm(u, mop3s, &(iop[2]));
928 }
929 break;
930
931 /* R, C/D */
932 case OP_R :
933 if (mop2t == OP_C)
934 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_CRG);
935 else if (mop2t == OP_D)
936 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_DBG);
937 break;
938
939 /* C, R */
940 case OP_C :
941 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_CRG);
942 break;
943
944 /* D, R */
945 case OP_D :
946 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_DBG);
947 break;
948
949 /* Q, P */
950 case OP_Q :
951 decode_modrm(u, &(iop[0]), mop1s, T_MMX, &(iop[1]), mop2s, T_MMX);
952 break;
953
954 /* S, E */
955 case OP_S :
956 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_SEG);
957 break;
958
959 /* W, V */
960 case OP_W :
961 decode_modrm(u, &(iop[0]), mop1s, T_XMM, &(iop[1]), mop2s, T_XMM);
962 break;
963
964 /* V, W[,I]/Q/M/E */
965 case OP_V :
966 if (mop2t == OP_W) {
967 /* special cases for movlps and movhps */
968 if (MODRM_MOD(inp_peek(u)) == 3) {
969 if (u->mnemonic == UD_Imovlps)
970 u->mnemonic = UD_Imovhlps;
971 else
972 if (u->mnemonic == UD_Imovhps)
973 u->mnemonic = UD_Imovlhps;
974 }
975 decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_XMM);
976 if (mop3t == OP_I)
977 decode_imm(u, mop3s, &(iop[2]));
978 } else if (mop2t == OP_Q)
979 decode_modrm(u, &(iop[1]), mop2s, T_MMX, &(iop[0]), mop1s, T_XMM);
980 else if (mop2t == OP_M) {
981 if (MODRM_MOD(inp_peek(u)) == 3)
982 u->error= 1;
983 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_XMM);
984 } else if (mop2t == OP_E) {
985 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_XMM);
986 } else if (mop2t == OP_PR) {
987 decode_modrm(u, &(iop[1]), mop2s, T_MMX, &(iop[0]), mop1s, T_XMM);
988 }
989 break;
990
991 /* DX, eAX/AL */
992 case OP_DX :
993 iop[0].type = UD_OP_REG;
994 iop[0].base = UD_R_DX;
995 iop[0].size = 16;
996
997 if (mop2t == OP_eAX) {
998 iop[1].type = UD_OP_REG;
999 iop[1].base = resolve_gpr32(u, mop2t);
1000 } else if (mop2t == OP_AL) {
1001 iop[1].type = UD_OP_REG;
1002 iop[1].base = UD_R_AL;
1003 iop[1].size = 8;
1004 }
1005
1006 break;
1007
1008 /* I, I/AL/eAX */
1009 case OP_I :
1010 decode_imm(u, mop1s, &(iop[0]));
1011 if (mop2t == OP_I)
1012 decode_imm(u, mop2s, &(iop[1]));
1013 else if (mop2t == OP_AL) {
1014 iop[1].type = UD_OP_REG;
1015 iop[1].base = UD_R_AL;
1016 iop[1].size = 16;
1017 } else if (mop2t == OP_eAX) {
1018 iop[1].type = UD_OP_REG;
1019 iop[1].base = resolve_gpr32(u, mop2t);
1020 }
1021 break;
1022
1023 /* O, AL/eAX */
1024 case OP_O :
1025 decode_o(u, mop1s, &(iop[0]));
1026 iop[1].type = UD_OP_REG;
1027 iop[1].size = resolve_operand_size(u, mop1s);
1028 if (mop2t == OP_AL)
1029 iop[1].base = UD_R_AL;
1030 else if (mop2t == OP_eAX)
1031 iop[1].base = resolve_gpr32(u, mop2t);
1032 else if (mop2t == OP_rAX)
1033 iop[1].base = resolve_gpr64(u, mop2t);
1034 break;
1035
1036 /* 3 */
1037 case OP_I3 :
1038 iop[0].type = UD_OP_CONST;
1039 iop[0].lval.sbyte = 3;
1040 break;
1041
1042 /* ST(n), ST(n) */
1043 case OP_ST0 : case OP_ST1 : case OP_ST2 : case OP_ST3 :
1044 case OP_ST4 : case OP_ST5 : case OP_ST6 : case OP_ST7 :
1045
1046 iop[0].type = UD_OP_REG;
1047 iop[0].base = (mop1t-OP_ST0) + UD_R_ST0;
1048 iop[0].size = 0;
1049
1050 if (mop2t >= OP_ST0 && mop2t <= OP_ST7) {
1051 iop[1].type = UD_OP_REG;
1052 iop[1].base = (mop2t-OP_ST0) + UD_R_ST0;
1053 iop[1].size = 0;
1054 }
1055 break;
1056
1057 /* AX */
1058 case OP_AX:
1059 iop[0].type = UD_OP_REG;
1060 iop[0].base = UD_R_AX;
1061 iop[0].size = 16;
1062 break;
1063
1064 /* none */
1065 default :
1066 iop[0].type = iop[1].type = iop[2].type = UD_NONE;
1067 }
1068
1069 return 0;
1070}
1071
1072/* -----------------------------------------------------------------------------
1073 * clear_insn() - clear instruction pointer
1074 * -----------------------------------------------------------------------------
1075 */
1076static int clear_insn(register struct ud* u)
1077{
1078 u->error = 0;
1079 u->pfx_seg = 0;
1080 u->pfx_opr = 0;
1081 u->pfx_adr = 0;
1082 u->pfx_lock = 0;
1083 u->pfx_repne = 0;
1084 u->pfx_rep = 0;
1085 u->pfx_repe = 0;
1086 u->pfx_seg = 0;
1087 u->pfx_rex = 0;
1088 u->pfx_insn = 0;
1089 u->mnemonic = UD_Inone;
1090 u->itab_entry = NULL;
1091
1092 memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) );
1093 memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) );
1094 memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) );
1095
1096 return 0;
1097}
1098
1099static int do_mode( struct ud* u )
1100{
1101 /* if in error state, bail out */
1102 if ( u->error ) return -1;
1103
1104 /* propagate perfix effects */
1105 if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */
1106
1107 /* Check validity of instruction m64 */
1108 if ( P_INV64( u->itab_entry->prefix ) ) {
1109 u->error = 1;
1110 return -1;
1111 }
1112
1113 /* effective rex prefix is the effective mask for the
1114 * instruction hard-coded in the opcode map.
1115 */
1116 u->pfx_rex = ( u->pfx_rex & 0x40 ) |
1117 ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) );
1118
1119 /* whether this instruction has a default operand size of
1120 * 64bit, also hardcoded into the opcode map.
1121 */
1122 u->default64 = P_DEF64( u->itab_entry->prefix );
1123 /* calculate effective operand size */
1124 if ( REX_W( u->pfx_rex ) ) {
1125 u->opr_mode = 64;
1126 } else if ( u->pfx_opr ) {
1127 u->opr_mode = 16;
1128 } else {
1129 /* unless the default opr size of instruction is 64,
1130 * the effective operand size in the absence of rex.w
1131 * prefix is 32.
1132 */
1133 u->opr_mode = ( u->default64 ) ? 64 : 32;
1134 }
1135
1136 /* calculate effective address size */
1137 u->adr_mode = (u->pfx_adr) ? 32 : 64;
1138 } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
1139 u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
1140 u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
1141 } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
1142 u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
1143 u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
1144 }
1145
1146 /* These flags determine which operand to apply the operand size
1147 * cast to.
1148 */
1149 u->c1 = ( P_C1( u->itab_entry->prefix ) ) ? 1 : 0;
1150 u->c2 = ( P_C2( u->itab_entry->prefix ) ) ? 1 : 0;
1151 u->c3 = ( P_C3( u->itab_entry->prefix ) ) ? 1 : 0;
1152
1153 /* set flags for implicit addressing */
1154 u->implicit_addr = P_IMPADDR( u->itab_entry->prefix );
1155
1156 return 0;
1157}
1158
1159static int gen_hex( struct ud *u )
1160{
1161 unsigned int i;
1162 unsigned char *src_ptr = inp_sess( u );
1163 char* src_hex;
1164
1165 /* bail out if in error stat. */
1166 if ( u->error ) return -1;
1167 /* output buffer pointe */
1168 src_hex = ( char* ) u->insn_hexcode;
1169 /* for each byte used to decode instruction */
1170 for ( i = 0; i < u->inp_ctr; ++i, ++src_ptr) {
1171 sprintf( src_hex, "%02x", *src_ptr & 0xFF );
1172 src_hex += 2;
1173 }
1174 return 0;
1175}
1176
1177/* =============================================================================
1178 * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
1179 * =============================================================================
1180 */
1181unsigned int ud_decode( struct ud* u )
1182{
1183 inp_start(u);
1184
1185 if ( clear_insn( u ) ) {
1186 ; /* error */
1187 } else if ( get_prefixes( u ) != 0 ) {
1188 ; /* error */
1189 } else if ( search_itab( u ) != 0 ) {
1190 ; /* error */
1191 } else if ( do_mode( u ) != 0 ) {
1192 ; /* error */
1193 } else if ( disasm_operands( u ) != 0 ) {
1194 ; /* error */
1195 } else if ( resolve_mnemonic( u ) != 0 ) {
1196 ; /* error */
1197 }
1198
1199 /* Handle decode error. */
1200 if ( u->error ) {
1201 /* clear out the decode data. */
1202 clear_insn( u );
1203 /* mark the sequence of bytes as invalid. */
1204 u->itab_entry = & ie_invalid;
1205 u->mnemonic = u->itab_entry->mnemonic;
1206 }
1207
1208 u->insn_offset = u->pc; /* set offset of instruction */
1209 u->insn_fill = 0; /* set translation buffer index to 0 */
1210 u->pc += u->inp_ctr; /* move program counter by bytes decoded */
1211 gen_hex( u ); /* generate hex code */
1212
1213 /* return number of bytes disassembled. */
1214 return u->inp_ctr;
1215}
1216
1217/* vim:cindent
1218 * vim:ts=4
1219 * vim:sw=4
1220 * vim:expandtab
1221 */
This page took 0.199614 seconds and 5 git commands to generate.