]> andersk Git - udis86.git/blame - libudis86/decode.c
Wait to include system headers until we know they are wanted.
[udis86.git] / libudis86 / decode.c
CommitLineData
bbe45369 1/* udis86 - libudis86/decode.c
2 *
3 * Copyright (c) 2002-2009 Vivek Thampi
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
bbe45369 26
27#include "types.h"
28#include "itab.h"
29#include "input.h"
30#include "decode.h"
31
cc971d96
AK
32#ifndef __UD_STANDALONE__
33# include <string.h>
34#endif /* __UD_STANDALONE__ */
35
bbe45369 36/* The max number of prefixes to an instruction */
37#define MAX_PREFIXES 15
38
39static struct ud_itab_entry ie_invalid = { UD_Iinvalid, O_NONE, O_NONE, O_NONE, P_none };
40static struct ud_itab_entry ie_pause = { UD_Ipause, O_NONE, O_NONE, O_NONE, P_none };
41static struct ud_itab_entry ie_nop = { UD_Inop, O_NONE, O_NONE, O_NONE, P_none };
42
43
44/* Looks up mnemonic code in the mnemonic string table
45 * Returns NULL if the mnemonic code is invalid
46 */
47const char * ud_lookup_mnemonic( enum ud_mnemonic_code c )
48{
49 if ( c < UD_Id3vil )
50 return ud_mnemonics_str[ c ];
51 return NULL;
52}
53
54
55/* Extracts instruction prefixes.
56 */
57static int get_prefixes( struct ud* u )
58{
59 unsigned int have_pfx = 1;
60 unsigned int i;
61 uint8_t curr;
62
63 /* if in error state, bail out */
64 if ( u->error )
65 return -1;
66
67 /* keep going as long as there are prefixes available */
68 for ( i = 0; have_pfx ; ++i ) {
69
70 /* Get next byte. */
71 inp_next(u);
72 if ( u->error )
73 return -1;
74 curr = inp_curr( u );
75
76 /* rex prefixes in 64bit mode */
77 if ( u->dis_mode == 64 && ( curr & 0xF0 ) == 0x40 ) {
78 u->pfx_rex = curr;
79 } else {
80 switch ( curr )
81 {
82 case 0x2E :
83 u->pfx_seg = UD_R_CS;
84 u->pfx_rex = 0;
85 break;
86 case 0x36 :
87 u->pfx_seg = UD_R_SS;
88 u->pfx_rex = 0;
89 break;
90 case 0x3E :
91 u->pfx_seg = UD_R_DS;
92 u->pfx_rex = 0;
93 break;
94 case 0x26 :
95 u->pfx_seg = UD_R_ES;
96 u->pfx_rex = 0;
97 break;
98 case 0x64 :
99 u->pfx_seg = UD_R_FS;
100 u->pfx_rex = 0;
101 break;
102 case 0x65 :
103 u->pfx_seg = UD_R_GS;
104 u->pfx_rex = 0;
105 break;
106 case 0x67 : /* adress-size override prefix */
107 u->pfx_adr = 0x67;
108 u->pfx_rex = 0;
109 break;
110 case 0xF0 :
111 u->pfx_lock = 0xF0;
112 u->pfx_rex = 0;
113 break;
114 case 0x66:
115 /* the 0x66 sse prefix is only effective if no other sse prefix
116 * has already been specified.
117 */
118 if ( !u->pfx_insn ) u->pfx_insn = 0x66;
119 u->pfx_opr = 0x66;
120 u->pfx_rex = 0;
121 break;
122 case 0xF2:
123 u->pfx_insn = 0xF2;
124 u->pfx_repne = 0xF2;
125 u->pfx_rex = 0;
126 break;
127 case 0xF3:
128 u->pfx_insn = 0xF3;
129 u->pfx_rep = 0xF3;
130 u->pfx_repe = 0xF3;
131 u->pfx_rex = 0;
132 break;
133 default :
134 /* No more prefixes */
135 have_pfx = 0;
136 break;
137 }
138 }
139
140 /* check if we reached max instruction length */
141 if ( i + 1 == MAX_INSN_LENGTH ) {
142 u->error = 1;
143 break;
144 }
145 }
146
147 /* return status */
148 if ( u->error )
149 return -1;
150
151 /* rewind back one byte in stream, since the above loop
152 * stops with a non-prefix byte.
153 */
154 inp_back(u);
155
156 /* speculatively determine the effective operand mode,
157 * based on the prefixes and the current disassembly
158 * mode. This may be inaccurate, but useful for mode
159 * dependent decoding.
160 */
161 if ( u->dis_mode == 64 ) {
162 u->opr_mode = REX_W( u->pfx_rex ) ? 64 : ( ( u->pfx_opr ) ? 16 : 32 ) ;
163 u->adr_mode = ( u->pfx_adr ) ? 32 : 64;
164 } else if ( u->dis_mode == 32 ) {
165 u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
166 u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
167 } else if ( u->dis_mode == 16 ) {
168 u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
169 u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
170 }
171
172 return 0;
173}
174
175
176/* Searches the instruction tables for the right entry.
177 */
178static int search_itab( struct ud * u )
179{
180 struct ud_itab_entry * e = NULL;
181 enum ud_itab_index table;
182 uint8_t peek;
183 uint8_t did_peek = 0;
184 uint8_t curr;
185 uint8_t index;
186
187 /* if in state of error, return */
188 if ( u->error )
189 return -1;
190
191 /* get first byte of opcode. */
192 inp_next(u);
193 if ( u->error )
194 return -1;
195 curr = inp_curr(u);
196
197 /* resolve xchg, nop, pause crazyness */
198 if ( 0x90 == curr ) {
199 if ( !( u->dis_mode == 64 && REX_B( u->pfx_rex ) ) ) {
200 if ( u->pfx_rep ) {
201 u->pfx_rep = 0;
202 e = & ie_pause;
203 } else {
204 e = & ie_nop;
205 }
206 goto found_entry;
207 }
208 }
209
210 /* get top-level table */
211 if ( 0x0F == curr ) {
212 table = ITAB__0F;
213 curr = inp_next(u);
214 if ( u->error )
215 return -1;
216
217 /* 2byte opcodes can be modified by 0x66, F3, and F2 prefixes */
218 if ( 0x66 == u->pfx_insn ) {
219 if ( ud_itab_list[ ITAB__PFX_SSE66__0F ][ curr ].mnemonic != UD_Iinvalid ) {
220 table = ITAB__PFX_SSE66__0F;
221 u->pfx_opr = 0;
222 }
223 } else if ( 0xF2 == u->pfx_insn ) {
224 if ( ud_itab_list[ ITAB__PFX_SSEF2__0F ][ curr ].mnemonic != UD_Iinvalid ) {
225 table = ITAB__PFX_SSEF2__0F;
226 u->pfx_repne = 0;
227 }
228 } else if ( 0xF3 == u->pfx_insn ) {
229 if ( ud_itab_list[ ITAB__PFX_SSEF3__0F ][ curr ].mnemonic != UD_Iinvalid ) {
230 table = ITAB__PFX_SSEF3__0F;
231 u->pfx_repe = 0;
232 u->pfx_rep = 0;
233 }
234 }
235 /* pick an instruction from the 1byte table */
236 } else {
237 table = ITAB__1BYTE;
238 }
239
240 index = curr;
241
242search:
243
244 e = & ud_itab_list[ table ][ index ];
245
246 /* if mnemonic constant is a standard instruction constant
247 * our search is over.
248 */
249
250 if ( e->mnemonic < UD_Id3vil ) {
251 if ( e->mnemonic == UD_Iinvalid ) {
252 if ( did_peek ) {
253 inp_next( u ); if ( u->error ) return -1;
254 }
255 goto found_entry;
256 }
257 goto found_entry;
258 }
259
260 table = e->prefix;
261
262 switch ( e->mnemonic )
263 {
264 case UD_Igrp_reg:
265 peek = inp_peek( u );
266 did_peek = 1;
267 index = MODRM_REG( peek );
268 break;
269
270 case UD_Igrp_mod:
271 peek = inp_peek( u );
272 did_peek = 1;
273 index = MODRM_MOD( peek );
274 if ( index == 3 )
275 index = ITAB__MOD_INDX__11;
276 else
277 index = ITAB__MOD_INDX__NOT_11;
278 break;
279
280 case UD_Igrp_rm:
281 curr = inp_next( u );
282 did_peek = 0;
283 if ( u->error )
284 return -1;
285 index = MODRM_RM( curr );
286 break;
287
288 case UD_Igrp_x87:
289 curr = inp_next( u );
290 did_peek = 0;
291 if ( u->error )
292 return -1;
293 index = curr - 0xC0;
294 break;
295
296 case UD_Igrp_3byte:
297 curr = inp_next( u );
298 did_peek = 0;
299 if (u->error)
300 return -1;
301 index = curr;
302 break;
303
304 case UD_Igrp_osize:
305 if ( u->opr_mode == 64 )
306 index = ITAB__MODE_INDX__64;
307 else if ( u->opr_mode == 32 )
308 index = ITAB__MODE_INDX__32;
309 else
310 index = ITAB__MODE_INDX__16;
311 break;
312
313 case UD_Igrp_asize:
314 if ( u->adr_mode == 64 )
315 index = ITAB__MODE_INDX__64;
316 else if ( u->adr_mode == 32 )
317 index = ITAB__MODE_INDX__32;
318 else
319 index = ITAB__MODE_INDX__16;
320 break;
321
322 case UD_Igrp_mode:
323 if ( u->dis_mode == 64 )
324 index = ITAB__MODE_INDX__64;
325 else if ( u->dis_mode == 32 )
326 index = ITAB__MODE_INDX__32;
327 else
328 index = ITAB__MODE_INDX__16;
329 break;
330
331 case UD_Igrp_vendor:
332 if ( u->vendor == UD_VENDOR_INTEL )
333 index = ITAB__VENDOR_INDX__INTEL;
334 else if ( u->vendor == UD_VENDOR_AMD )
335 index = ITAB__VENDOR_INDX__AMD;
336 else if ( u->vendor == UD_VENDOR_ANY )
337 index = ITAB__VENDOR_INDX__ANY;
338 else
339 return -1;
340 break;
341
342 case UD_Id3vil:
343 return -1;
344 break;
345
346 default:
347 return -1;
348 break;
349 }
350
351 goto search;
352
353found_entry:
354
355 u->itab_entry = e;
356 u->mnemonic = u->itab_entry->mnemonic;
357
358 return 0;
359}
360
361
362static unsigned int resolve_operand_size( const struct ud * u, unsigned int s )
363{
364 switch ( s )
365 {
366 case SZ_V:
367 return ( u->opr_mode );
368 case SZ_Z:
369 return ( u->opr_mode == 16 ) ? 16 : 32;
370 case SZ_P:
371 return ( u->opr_mode == 16 ) ? SZ_WP : SZ_DP;
372 case SZ_MDQ:
373 return ( u->opr_mode == 16 ) ? 32 : u->opr_mode;
374 case SZ_RDQ:
375 return ( u->dis_mode == 64 ) ? 64 : 32;
376 default:
377 return s;
378 }
379}
380
381
382static int resolve_mnemonic( struct ud* u )
383{
384 /* far/near flags */
385 u->br_far = 0;
386 u->br_near = 0;
387 /* readjust operand sizes for call/jmp instrcutions */
388 if ( u->mnemonic == UD_Icall || u->mnemonic == UD_Ijmp ) {
389 /* WP: 16bit pointer */
390 if ( u->operand[ 0 ].size == SZ_WP ) {
391 u->operand[ 0 ].size = 16;
392 u->br_far = 1;
393 u->br_near= 0;
394 /* DP: 32bit pointer */
395 } else if ( u->operand[ 0 ].size == SZ_DP ) {
396 u->operand[ 0 ].size = 32;
397 u->br_far = 1;
398 u->br_near= 0;
399 } else {
400 u->br_far = 0;
401 u->br_near= 1;
402 }
403 /* resolve 3dnow weirdness. */
404 } else if ( u->mnemonic == UD_I3dnow ) {
405 u->mnemonic = ud_itab_list[ ITAB__3DNOW ][ inp_curr( u ) ].mnemonic;
406 }
407 /* SWAPGS is only valid in 64bits mode */
408 if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
409 u->error = 1;
410 return -1;
411 }
412
413 return 0;
414}
415
416
417/* -----------------------------------------------------------------------------
418 * decode_a()- Decodes operands of the type seg:offset
419 * -----------------------------------------------------------------------------
420 */
421static void
422decode_a(struct ud* u, struct ud_operand *op)
423{
424 if (u->opr_mode == 16) {
425 /* seg16:off16 */
426 op->type = UD_OP_PTR;
427 op->size = 32;
428 op->lval.ptr.off = inp_uint16(u);
429 op->lval.ptr.seg = inp_uint16(u);
430 } else {
431 /* seg16:off32 */
432 op->type = UD_OP_PTR;
433 op->size = 48;
434 op->lval.ptr.off = inp_uint32(u);
435 op->lval.ptr.seg = inp_uint16(u);
436 }
437}
438
439/* -----------------------------------------------------------------------------
440 * decode_gpr() - Returns decoded General Purpose Register
441 * -----------------------------------------------------------------------------
442 */
443static enum ud_type
444decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
445{
446 s = resolve_operand_size(u, s);
447
448 switch (s) {
449 case 64:
450 return UD_R_RAX + rm;
451 case SZ_DP:
452 case 32:
453 return UD_R_EAX + rm;
454 case SZ_WP:
455 case 16:
456 return UD_R_AX + rm;
457 case 8:
458 if (u->dis_mode == 64 && u->pfx_rex) {
459 if (rm >= 4)
460 return UD_R_SPL + (rm-4);
461 return UD_R_AL + rm;
462 } else return UD_R_AL + rm;
463 default:
464 return 0;
465 }
466}
467
468/* -----------------------------------------------------------------------------
469 * resolve_gpr64() - 64bit General Purpose Register-Selection.
470 * -----------------------------------------------------------------------------
471 */
472static enum ud_type
473resolve_gpr64(struct ud* u, enum ud_operand_code gpr_op)
474{
475 if (gpr_op >= OP_rAXr8 && gpr_op <= OP_rDIr15)
476 gpr_op = (gpr_op - OP_rAXr8) | (REX_B(u->pfx_rex) << 3);
477 else gpr_op = (gpr_op - OP_rAX);
478
479 if (u->opr_mode == 16)
480 return gpr_op + UD_R_AX;
481 if (u->dis_mode == 32 ||
482 (u->opr_mode == 32 && ! (REX_W(u->pfx_rex) || u->default64))) {
483 return gpr_op + UD_R_EAX;
484 }
485
486 return gpr_op + UD_R_RAX;
487}
488
489/* -----------------------------------------------------------------------------
490 * resolve_gpr32 () - 32bit General Purpose Register-Selection.
491 * -----------------------------------------------------------------------------
492 */
493static enum ud_type
494resolve_gpr32(struct ud* u, enum ud_operand_code gpr_op)
495{
496 gpr_op = gpr_op - OP_eAX;
497
498 if (u->opr_mode == 16)
499 return gpr_op + UD_R_AX;
500
501 return gpr_op + UD_R_EAX;
502}
503
504/* -----------------------------------------------------------------------------
505 * resolve_reg() - Resolves the register type
506 * -----------------------------------------------------------------------------
507 */
508static enum ud_type
509resolve_reg(struct ud* u, unsigned int type, unsigned char i)
510{
511 switch (type) {
512 case T_MMX : return UD_R_MM0 + (i & 7);
513 case T_XMM : return UD_R_XMM0 + i;
514 case T_CRG : return UD_R_CR0 + i;
515 case T_DBG : return UD_R_DR0 + i;
516 case T_SEG : return UD_R_ES + (i & 7);
517 case T_NONE:
518 default: return UD_NONE;
519 }
520}
521
522/* -----------------------------------------------------------------------------
523 * decode_imm() - Decodes Immediate values.
524 * -----------------------------------------------------------------------------
525 */
526static void
527decode_imm(struct ud* u, unsigned int s, struct ud_operand *op)
528{
529 op->size = resolve_operand_size(u, s);
530 op->type = UD_OP_IMM;
531
532 switch (op->size) {
533 case 8: op->lval.sbyte = inp_uint8(u); break;
534 case 16: op->lval.uword = inp_uint16(u); break;
535 case 32: op->lval.udword = inp_uint32(u); break;
536 case 64: op->lval.uqword = inp_uint64(u); break;
537 default: return;
538 }
539}
540
541/* -----------------------------------------------------------------------------
542 * decode_modrm() - Decodes ModRM Byte
543 * -----------------------------------------------------------------------------
544 */
545static void
546decode_modrm(struct ud* u, struct ud_operand *op, unsigned int s,
547 unsigned char rm_type, struct ud_operand *opreg,
548 unsigned char reg_size, unsigned char reg_type)
549{
550 unsigned char mod, rm, reg;
551
552 inp_next(u);
553
554 /* get mod, r/m and reg fields */
555 mod = MODRM_MOD(inp_curr(u));
556 rm = (REX_B(u->pfx_rex) << 3) | MODRM_RM(inp_curr(u));
557 reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(inp_curr(u));
558
559 op->size = resolve_operand_size(u, s);
560
561 /* if mod is 11b, then the UD_R_m specifies a gpr/mmx/sse/control/debug */
562 if (mod == 3) {
563 op->type = UD_OP_REG;
564 if (rm_type == T_GPR)
565 op->base = decode_gpr(u, op->size, rm);
566 else op->base = resolve_reg(u, rm_type, (REX_B(u->pfx_rex) << 3) | (rm&7));
567 }
568 /* else its memory addressing */
569 else {
570 op->type = UD_OP_MEM;
571
572 /* 64bit addressing */
573 if (u->adr_mode == 64) {
574
575 op->base = UD_R_RAX + rm;
576
577 /* get offset type */
578 if (mod == 1)
579 op->offset = 8;
580 else if (mod == 2)
581 op->offset = 32;
582 else if (mod == 0 && (rm & 7) == 5) {
583 op->base = UD_R_RIP;
584 op->offset = 32;
585 } else op->offset = 0;
586
587 /* Scale-Index-Base (SIB) */
588 if ((rm & 7) == 4) {
589 inp_next(u);
590
591 op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
592 op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
593 op->base = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
594
595 /* special conditions for base reference */
596 if (op->index == UD_R_RSP) {
597 op->index = UD_NONE;
598 op->scale = UD_NONE;
599 }
600
601 if (op->base == UD_R_RBP || op->base == UD_R_R13) {
602 if (mod == 0)
603 op->base = UD_NONE;
604 if (mod == 1)
605 op->offset = 8;
606 else op->offset = 32;
607 }
608 }
609 }
610
611 /* 32-Bit addressing mode */
612 else if (u->adr_mode == 32) {
613
614 /* get base */
615 op->base = UD_R_EAX + rm;
616
617 /* get offset type */
618 if (mod == 1)
619 op->offset = 8;
620 else if (mod == 2)
621 op->offset = 32;
622 else if (mod == 0 && rm == 5) {
623 op->base = UD_NONE;
624 op->offset = 32;
625 } else op->offset = 0;
626
627 /* Scale-Index-Base (SIB) */
628 if ((rm & 7) == 4) {
629 inp_next(u);
630
631 op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
632 op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
633 op->base = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
634
635 if (op->index == UD_R_ESP) {
636 op->index = UD_NONE;
637 op->scale = UD_NONE;
638 }
639
640 /* special condition for base reference */
641 if (op->base == UD_R_EBP) {
642 if (mod == 0)
643 op->base = UD_NONE;
644 if (mod == 1)
645 op->offset = 8;
646 else op->offset = 32;
647 }
648 }
649 }
650
651 /* 16bit addressing mode */
652 else {
653 switch (rm) {
654 case 0: op->base = UD_R_BX; op->index = UD_R_SI; break;
655 case 1: op->base = UD_R_BX; op->index = UD_R_DI; break;
656 case 2: op->base = UD_R_BP; op->index = UD_R_SI; break;
657 case 3: op->base = UD_R_BP; op->index = UD_R_DI; break;
658 case 4: op->base = UD_R_SI; break;
659 case 5: op->base = UD_R_DI; break;
660 case 6: op->base = UD_R_BP; break;
661 case 7: op->base = UD_R_BX; break;
662 }
663
664 if (mod == 0 && rm == 6) {
665 op->offset= 16;
666 op->base = UD_NONE;
667 }
668 else if (mod == 1)
669 op->offset = 8;
670 else if (mod == 2)
671 op->offset = 16;
672 }
673 }
674
675 /* extract offset, if any */
676 switch(op->offset) {
677 case 8 : op->lval.ubyte = inp_uint8(u); break;
678 case 16: op->lval.uword = inp_uint16(u); break;
679 case 32: op->lval.udword = inp_uint32(u); break;
680 case 64: op->lval.uqword = inp_uint64(u); break;
681 default: break;
682 }
683
684 /* resolve register encoded in reg field */
685 if (opreg) {
686 opreg->type = UD_OP_REG;
687 opreg->size = resolve_operand_size(u, reg_size);
688 if (reg_type == T_GPR)
689 opreg->base = decode_gpr(u, opreg->size, reg);
690 else opreg->base = resolve_reg(u, reg_type, reg);
691 }
692}
693
694/* -----------------------------------------------------------------------------
695 * decode_o() - Decodes offset
696 * -----------------------------------------------------------------------------
697 */
698static void
699decode_o(struct ud* u, unsigned int s, struct ud_operand *op)
700{
701 switch (u->adr_mode) {
702 case 64:
703 op->offset = 64;
704 op->lval.uqword = inp_uint64(u);
705 break;
706 case 32:
707 op->offset = 32;
708 op->lval.udword = inp_uint32(u);
709 break;
710 case 16:
711 op->offset = 16;
712 op->lval.uword = inp_uint16(u);
713 break;
714 default:
715 return;
716 }
717 op->type = UD_OP_MEM;
718 op->size = resolve_operand_size(u, s);
719}
720
721/* -----------------------------------------------------------------------------
722 * disasm_operands() - Disassembles Operands.
723 * -----------------------------------------------------------------------------
724 */
725static int disasm_operands(register struct ud* u)
726{
727
728
729 /* mopXt = map entry, operand X, type; */
730 enum ud_operand_code mop1t = u->itab_entry->operand1.type;
731 enum ud_operand_code mop2t = u->itab_entry->operand2.type;
732 enum ud_operand_code mop3t = u->itab_entry->operand3.type;
733
734 /* mopXs = map entry, operand X, size */
735 unsigned int mop1s = u->itab_entry->operand1.size;
736 unsigned int mop2s = u->itab_entry->operand2.size;
737 unsigned int mop3s = u->itab_entry->operand3.size;
738
739 /* iop = instruction operand */
740 register struct ud_operand* iop = u->operand;
741
742 switch(mop1t) {
743
744 case OP_A :
745 decode_a(u, &(iop[0]));
746 break;
747
748 /* M[b] ... */
749 case OP_M :
750 if (MODRM_MOD(inp_peek(u)) == 3)
751 u->error= 1;
752 /* E, G/P/V/I/CL/1/S */
753 case OP_E :
754 if (mop2t == OP_G) {
755 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_GPR);
756 if (mop3t == OP_I)
757 decode_imm(u, mop3s, &(iop[2]));
758 else if (mop3t == OP_CL) {
759 iop[2].type = UD_OP_REG;
760 iop[2].base = UD_R_CL;
761 iop[2].size = 8;
762 }
763 }
764 else if (mop2t == OP_P)
765 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_MMX);
766 else if (mop2t == OP_V)
767 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_XMM);
768 else if (mop2t == OP_S)
769 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_SEG);
770 else {
771 decode_modrm(u, &(iop[0]), mop1s, T_GPR, NULL, 0, T_NONE);
772 if (mop2t == OP_CL) {
773 iop[1].type = UD_OP_REG;
774 iop[1].base = UD_R_CL;
775 iop[1].size = 8;
776 } else if (mop2t == OP_I1) {
777 iop[1].type = UD_OP_CONST;
778 u->operand[1].lval.udword = 1;
779 } else if (mop2t == OP_I) {
780 decode_imm(u, mop2s, &(iop[1]));
781 }
782 }
783 break;
784
785 /* G, E/PR[,I]/VR */
786 case OP_G :
787 if (mop2t == OP_M) {
788 if (MODRM_MOD(inp_peek(u)) == 3)
789 u->error= 1;
790 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_GPR);
791 } else if (mop2t == OP_E) {
792 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_GPR);
793 if (mop3t == OP_I)
794 decode_imm(u, mop3s, &(iop[2]));
795 } else if (mop2t == OP_PR) {
796 decode_modrm(u, &(iop[1]), mop2s, T_MMX, &(iop[0]), mop1s, T_GPR);
797 if (mop3t == OP_I)
798 decode_imm(u, mop3s, &(iop[2]));
799 } else if (mop2t == OP_VR) {
800 if (MODRM_MOD(inp_peek(u)) != 3)
801 u->error = 1;
802 decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_GPR);
803 } else if (mop2t == OP_W)
804 decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_GPR);
805 break;
806
807 /* AL..BH, I/O/DX */
808 case OP_AL : case OP_CL : case OP_DL : case OP_BL :
809 case OP_AH : case OP_CH : case OP_DH : case OP_BH :
810
811 iop[0].type = UD_OP_REG;
812 iop[0].base = UD_R_AL + (mop1t - OP_AL);
813 iop[0].size = 8;
814
815 if (mop2t == OP_I)
816 decode_imm(u, mop2s, &(iop[1]));
817 else if (mop2t == OP_DX) {
818 iop[1].type = UD_OP_REG;
819 iop[1].base = UD_R_DX;
820 iop[1].size = 16;
821 }
822 else if (mop2t == OP_O)
823 decode_o(u, mop2s, &(iop[1]));
824 break;
825
826 /* rAX[r8]..rDI[r15], I/rAX..rDI/O */
827 case OP_rAXr8 : case OP_rCXr9 : case OP_rDXr10 : case OP_rBXr11 :
828 case OP_rSPr12: case OP_rBPr13: case OP_rSIr14 : case OP_rDIr15 :
829 case OP_rAX : case OP_rCX : case OP_rDX : case OP_rBX :
830 case OP_rSP : case OP_rBP : case OP_rSI : case OP_rDI :
831
832 iop[0].type = UD_OP_REG;
833 iop[0].base = resolve_gpr64(u, mop1t);
834
835 if (mop2t == OP_I)
836 decode_imm(u, mop2s, &(iop[1]));
837 else if (mop2t >= OP_rAX && mop2t <= OP_rDI) {
838 iop[1].type = UD_OP_REG;
839 iop[1].base = resolve_gpr64(u, mop2t);
840 }
841 else if (mop2t == OP_O) {
842 decode_o(u, mop2s, &(iop[1]));
843 iop[0].size = resolve_operand_size(u, mop2s);
844 }
845 break;
846
847 /* AL[r8b]..BH[r15b], I */
848 case OP_ALr8b : case OP_CLr9b : case OP_DLr10b : case OP_BLr11b :
849 case OP_AHr12b: case OP_CHr13b: case OP_DHr14b : case OP_BHr15b :
850 {
851 ud_type_t gpr = (mop1t - OP_ALr8b) + UD_R_AL +
852 (REX_B(u->pfx_rex) << 3);
853 if (UD_R_AH <= gpr && u->pfx_rex)
854 gpr = gpr + 4;
855 iop[0].type = UD_OP_REG;
856 iop[0].base = gpr;
857 if (mop2t == OP_I)
858 decode_imm(u, mop2s, &(iop[1]));
859 break;
860 }
861
862 /* eAX..eDX, DX/I */
863 case OP_eAX : case OP_eCX : case OP_eDX : case OP_eBX :
864 case OP_eSP : case OP_eBP : case OP_eSI : case OP_eDI :
865 iop[0].type = UD_OP_REG;
866 iop[0].base = resolve_gpr32(u, mop1t);
867 if (mop2t == OP_DX) {
868 iop[1].type = UD_OP_REG;
869 iop[1].base = UD_R_DX;
870 iop[1].size = 16;
871 } else if (mop2t == OP_I)
872 decode_imm(u, mop2s, &(iop[1]));
873 break;
874
875 /* ES..GS */
876 case OP_ES : case OP_CS : case OP_DS :
877 case OP_SS : case OP_FS : case OP_GS :
878
879 /* in 64bits mode, only fs and gs are allowed */
880 if (u->dis_mode == 64)
881 if (mop1t != OP_FS && mop1t != OP_GS)
882 u->error= 1;
883 iop[0].type = UD_OP_REG;
884 iop[0].base = (mop1t - OP_ES) + UD_R_ES;
885 iop[0].size = 16;
886
887 break;
888
889 /* J */
890 case OP_J :
891 decode_imm(u, mop1s, &(iop[0]));
892 iop[0].type = UD_OP_JIMM;
893 break ;
894
895 /* PR, I */
896 case OP_PR:
897 if (MODRM_MOD(inp_peek(u)) != 3)
898 u->error = 1;
899 decode_modrm(u, &(iop[0]), mop1s, T_MMX, NULL, 0, T_NONE);
900 if (mop2t == OP_I)
901 decode_imm(u, mop2s, &(iop[1]));
902 break;
903
904 /* VR, I */
905 case OP_VR:
906 if (MODRM_MOD(inp_peek(u)) != 3)
907 u->error = 1;
908 decode_modrm(u, &(iop[0]), mop1s, T_XMM, NULL, 0, T_NONE);
909 if (mop2t == OP_I)
910 decode_imm(u, mop2s, &(iop[1]));
911 break;
912
913 /* P, Q[,I]/W/E[,I],VR */
914 case OP_P :
915 if (mop2t == OP_Q) {
916 decode_modrm(u, &(iop[1]), mop2s, T_MMX, &(iop[0]), mop1s, T_MMX);
917 if (mop3t == OP_I)
918 decode_imm(u, mop3s, &(iop[2]));
919 } else if (mop2t == OP_W) {
920 decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_MMX);
921 } else if (mop2t == OP_VR) {
922 if (MODRM_MOD(inp_peek(u)) != 3)
923 u->error = 1;
924 decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_MMX);
925 } else if (mop2t == OP_E) {
926 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_MMX);
927 if (mop3t == OP_I)
928 decode_imm(u, mop3s, &(iop[2]));
929 }
930 break;
931
932 /* R, C/D */
933 case OP_R :
934 if (mop2t == OP_C)
935 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_CRG);
936 else if (mop2t == OP_D)
937 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_DBG);
938 break;
939
940 /* C, R */
941 case OP_C :
942 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_CRG);
943 break;
944
945 /* D, R */
946 case OP_D :
947 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_DBG);
948 break;
949
950 /* Q, P */
951 case OP_Q :
952 decode_modrm(u, &(iop[0]), mop1s, T_MMX, &(iop[1]), mop2s, T_MMX);
953 break;
954
955 /* S, E */
956 case OP_S :
957 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_SEG);
958 break;
959
960 /* W, V */
961 case OP_W :
962 decode_modrm(u, &(iop[0]), mop1s, T_XMM, &(iop[1]), mop2s, T_XMM);
963 break;
964
965 /* V, W[,I]/Q/M/E */
966 case OP_V :
967 if (mop2t == OP_W) {
968 /* special cases for movlps and movhps */
969 if (MODRM_MOD(inp_peek(u)) == 3) {
970 if (u->mnemonic == UD_Imovlps)
971 u->mnemonic = UD_Imovhlps;
972 else
973 if (u->mnemonic == UD_Imovhps)
974 u->mnemonic = UD_Imovlhps;
975 }
976 decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_XMM);
977 if (mop3t == OP_I)
978 decode_imm(u, mop3s, &(iop[2]));
979 } else if (mop2t == OP_Q)
980 decode_modrm(u, &(iop[1]), mop2s, T_MMX, &(iop[0]), mop1s, T_XMM);
981 else if (mop2t == OP_M) {
982 if (MODRM_MOD(inp_peek(u)) == 3)
983 u->error= 1;
984 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_XMM);
985 } else if (mop2t == OP_E) {
986 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_XMM);
987 } else if (mop2t == OP_PR) {
988 decode_modrm(u, &(iop[1]), mop2s, T_MMX, &(iop[0]), mop1s, T_XMM);
989 }
990 break;
991
992 /* DX, eAX/AL */
993 case OP_DX :
994 iop[0].type = UD_OP_REG;
995 iop[0].base = UD_R_DX;
996 iop[0].size = 16;
997
998 if (mop2t == OP_eAX) {
999 iop[1].type = UD_OP_REG;
1000 iop[1].base = resolve_gpr32(u, mop2t);
1001 } else if (mop2t == OP_AL) {
1002 iop[1].type = UD_OP_REG;
1003 iop[1].base = UD_R_AL;
1004 iop[1].size = 8;
1005 }
1006
1007 break;
1008
1009 /* I, I/AL/eAX */
1010 case OP_I :
1011 decode_imm(u, mop1s, &(iop[0]));
1012 if (mop2t == OP_I)
1013 decode_imm(u, mop2s, &(iop[1]));
1014 else if (mop2t == OP_AL) {
1015 iop[1].type = UD_OP_REG;
1016 iop[1].base = UD_R_AL;
1017 iop[1].size = 16;
1018 } else if (mop2t == OP_eAX) {
1019 iop[1].type = UD_OP_REG;
1020 iop[1].base = resolve_gpr32(u, mop2t);
1021 }
1022 break;
1023
1024 /* O, AL/eAX */
1025 case OP_O :
1026 decode_o(u, mop1s, &(iop[0]));
1027 iop[1].type = UD_OP_REG;
1028 iop[1].size = resolve_operand_size(u, mop1s);
1029 if (mop2t == OP_AL)
1030 iop[1].base = UD_R_AL;
1031 else if (mop2t == OP_eAX)
1032 iop[1].base = resolve_gpr32(u, mop2t);
1033 else if (mop2t == OP_rAX)
1034 iop[1].base = resolve_gpr64(u, mop2t);
1035 break;
1036
1037 /* 3 */
1038 case OP_I3 :
1039 iop[0].type = UD_OP_CONST;
1040 iop[0].lval.sbyte = 3;
1041 break;
1042
1043 /* ST(n), ST(n) */
1044 case OP_ST0 : case OP_ST1 : case OP_ST2 : case OP_ST3 :
1045 case OP_ST4 : case OP_ST5 : case OP_ST6 : case OP_ST7 :
1046
1047 iop[0].type = UD_OP_REG;
1048 iop[0].base = (mop1t-OP_ST0) + UD_R_ST0;
1049 iop[0].size = 0;
1050
1051 if (mop2t >= OP_ST0 && mop2t <= OP_ST7) {
1052 iop[1].type = UD_OP_REG;
1053 iop[1].base = (mop2t-OP_ST0) + UD_R_ST0;
1054 iop[1].size = 0;
1055 }
1056 break;
1057
1058 /* AX */
1059 case OP_AX:
1060 iop[0].type = UD_OP_REG;
1061 iop[0].base = UD_R_AX;
1062 iop[0].size = 16;
1063 break;
1064
1065 /* none */
1066 default :
1067 iop[0].type = iop[1].type = iop[2].type = UD_NONE;
1068 }
1069
1070 return 0;
1071}
1072
1073/* -----------------------------------------------------------------------------
1074 * clear_insn() - clear instruction pointer
1075 * -----------------------------------------------------------------------------
1076 */
1077static int clear_insn(register struct ud* u)
1078{
1079 u->error = 0;
1080 u->pfx_seg = 0;
1081 u->pfx_opr = 0;
1082 u->pfx_adr = 0;
1083 u->pfx_lock = 0;
1084 u->pfx_repne = 0;
1085 u->pfx_rep = 0;
1086 u->pfx_repe = 0;
1087 u->pfx_seg = 0;
1088 u->pfx_rex = 0;
1089 u->pfx_insn = 0;
1090 u->mnemonic = UD_Inone;
1091 u->itab_entry = NULL;
1092
1093 memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) );
1094 memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) );
1095 memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) );
1096
1097 return 0;
1098}
1099
1100static int do_mode( struct ud* u )
1101{
1102 /* if in error state, bail out */
1103 if ( u->error ) return -1;
1104
1105 /* propagate perfix effects */
1106 if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */
1107
1108 /* Check validity of instruction m64 */
1109 if ( P_INV64( u->itab_entry->prefix ) ) {
1110 u->error = 1;
1111 return -1;
1112 }
1113
1114 /* effective rex prefix is the effective mask for the
1115 * instruction hard-coded in the opcode map.
1116 */
1117 u->pfx_rex = ( u->pfx_rex & 0x40 ) |
1118 ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) );
1119
1120 /* whether this instruction has a default operand size of
1121 * 64bit, also hardcoded into the opcode map.
1122 */
1123 u->default64 = P_DEF64( u->itab_entry->prefix );
1124 /* calculate effective operand size */
1125 if ( REX_W( u->pfx_rex ) ) {
1126 u->opr_mode = 64;
1127 } else if ( u->pfx_opr ) {
1128 u->opr_mode = 16;
1129 } else {
1130 /* unless the default opr size of instruction is 64,
1131 * the effective operand size in the absence of rex.w
1132 * prefix is 32.
1133 */
1134 u->opr_mode = ( u->default64 ) ? 64 : 32;
1135 }
1136
1137 /* calculate effective address size */
1138 u->adr_mode = (u->pfx_adr) ? 32 : 64;
1139 } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
1140 u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
1141 u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
1142 } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
1143 u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
1144 u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
1145 }
1146
1147 /* These flags determine which operand to apply the operand size
1148 * cast to.
1149 */
1150 u->c1 = ( P_C1( u->itab_entry->prefix ) ) ? 1 : 0;
1151 u->c2 = ( P_C2( u->itab_entry->prefix ) ) ? 1 : 0;
1152 u->c3 = ( P_C3( u->itab_entry->prefix ) ) ? 1 : 0;
1153
1154 /* set flags for implicit addressing */
1155 u->implicit_addr = P_IMPADDR( u->itab_entry->prefix );
1156
1157 return 0;
1158}
1159
1160static int gen_hex( struct ud *u )
1161{
1162 unsigned int i;
1163 unsigned char *src_ptr = inp_sess( u );
1164 char* src_hex;
1165
1166 /* bail out if in error stat. */
1167 if ( u->error ) return -1;
1168 /* output buffer pointe */
1169 src_hex = ( char* ) u->insn_hexcode;
1170 /* for each byte used to decode instruction */
1171 for ( i = 0; i < u->inp_ctr; ++i, ++src_ptr) {
1172 sprintf( src_hex, "%02x", *src_ptr & 0xFF );
1173 src_hex += 2;
1174 }
1175 return 0;
1176}
1177
1178/* =============================================================================
1179 * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
1180 * =============================================================================
1181 */
1182unsigned int ud_decode( struct ud* u )
1183{
1184 inp_start(u);
1185
1186 if ( clear_insn( u ) ) {
1187 ; /* error */
1188 } else if ( get_prefixes( u ) != 0 ) {
1189 ; /* error */
1190 } else if ( search_itab( u ) != 0 ) {
1191 ; /* error */
1192 } else if ( do_mode( u ) != 0 ) {
1193 ; /* error */
1194 } else if ( disasm_operands( u ) != 0 ) {
1195 ; /* error */
1196 } else if ( resolve_mnemonic( u ) != 0 ) {
1197 ; /* error */
1198 }
1199
1200 /* Handle decode error. */
1201 if ( u->error ) {
1202 /* clear out the decode data. */
1203 clear_insn( u );
1204 /* mark the sequence of bytes as invalid. */
1205 u->itab_entry = & ie_invalid;
1206 u->mnemonic = u->itab_entry->mnemonic;
1207 }
1208
1209 u->insn_offset = u->pc; /* set offset of instruction */
1210 u->insn_fill = 0; /* set translation buffer index to 0 */
1211 u->pc += u->inp_ctr; /* move program counter by bytes decoded */
1212 gen_hex( u ); /* generate hex code */
1213
1214 /* return number of bytes disassembled. */
1215 return u->inp_ctr;
1216}
1217
1218/* vim:cindent
1219 * vim:ts=4
1220 * vim:sw=4
1221 * vim:expandtab
1222 */
This page took 0.214345 seconds and 5 git commands to generate.