x86_disasm(3) disassemble a

Other Alias

x86_disasm_forward, x86_disasm_range

SYNOPSIS

#include <libdis.h>

typedef void (*DISASM_CALLBACK)( x86_insn_t *, void * );
typedef long (*DISASM_RESOLVER)( x86_op_t *, x86_insn_t *, void * );

int x86_disasm( unsigned char *buf, unsigned int buf_len,
               unsigned long
buf_rva, unsigned int offset,

               x86_insn_t *
insn );

int x86_disasm_range( unsigned char *buf, unsigned long buf_rva,
                unsigned int
offset, unsigned int len,

                DISASM_CALLBACK
func, void *arg );

int x86_disasm_forward( unsigned char *buf, unsigned int buf_len,
                       unsigned long
buf_rva, unsigned int offset,

                       DISASM_CALLBACK
func, void *arg,

                       DISASM_RESOLVER
resolver, void *r_arg );

DESCRIPTION

#define MAX_REGNAME 8
#define MAX_PREFIX_STR 32
#define MAX_MNEM_STR 16
#define MAX_INSN_SIZE 20       /* same as in i386.h */

#define MAX_OP_STRING 32       /* max possible operand size in string form */

#define MAX_OP_RAW_STRING 64 /* max possible operand size in raw form */
#define MAX_OP_XML_STRING 256 /* max possible operand size in xml form */
#define MAX_NUM_OPERANDS 8     /* max # implicit and explicit operands */

#define MAX_INSN_STRING 512 /* 2 * 8 * MAX_OP_STRING */
#define MAX_INSN_RAW_STRING 1024 /* 2 * 8 * MAX_OP_RAW_STRING */
#define MAX_INSN_XML_STRING 4096 /* 2 * 8 * MAX_OP_XML_STRING */

enum x86_reg_type {    /* NOTE: these may be ORed together */

       reg_gen = 0x00001,  /* general purpose */

       reg_in = 0x00002,  /* incoming args, ala RISC */

       reg_out = 0x00004,  /* args to calls, ala RISC */

       reg_local = 0x00008,  /* local vars, ala RISC */

       reg_fpu = 0x00010,  /* FPU data register */

       reg_seg = 0x00020,  /* segment register */

       reg_simd = 0x00040,  /* SIMD/MMX reg */

       reg_sys = 0x00080,  /* restricted/system register */

       reg_sp = 0x00100,  /* stack pointer */

       reg_fp = 0x00200,  /* frame pointer */

       reg_pc = 0x00400,  /* program counter */

       reg_retaddr = 0x00800,  /* return addr for func */

       reg_cond = 0x01000,  /* condition code / flags */

       reg_zero = 0x02000,  /* zero register, ala RISC */

       reg_ret = 0x04000,  /* return value */

       reg_src = 0x10000,  /* array/rep source */

       reg_dest = 0x20000,  /* array/rep destination */

       reg_count = 0x40000   /* array/rep/loop counter */

};

typedef struct {
       char name[MAX_REGNAME];

       enum x86_reg_type type;                 /* what register is used for */

       unsigned int size;                      /* size of register in bytes */

       unsigned int id;                                /* register ID #, for quick compares */

       unsigned int alias;                     /* ID of reg this is an alias of */

       unsigned int shift;                     /* amount to shift aliased reg by */

} x86_reg_t;

typedef struct {
       unsigned int scale;         /* scale factor */

       x86_reg_t index, base;   /* index, base registers */

       long disp;          /* displacement */

       char disp_sign;     /* is negative? 1/0 */

       char disp_size;     /* 0, 1, 2, 4 */

} x86_ea_t;

enum x86_op_type {     /* mutually exclusive */

       op_unused = 0,          /* empty/unused operand */

       op_register = 1,        /* CPU register */

       op_immediate = 2,       /* Immediate Value */

       op_relative_near = 3,   /* Relative offset from IP */

       op_relative_far = 4,

       op_absolute = 5,        /* Absolute address (ptr16:32) */

       op_expression = 6,      /* Address expression (scale/index/base/disp) */

       op_offset = 7,          /* Offset from start of segment (m32) */

       op_unknown

};

enum x86_op_datatype {         /* these use Intel's lame terminology */

       op_byte = 1,            /* 1 byte integer */

       op_word = 2,            /* 2 byte integer */

       op_dword = 3,           /* 4 byte integer */

       op_qword = 4,           /* 8 byte integer */

       op_dqword = 5,          /* 16 byte integer */

       op_sreal = 6,           /* 4 byte real (single real) */

       op_dreal = 7,           /* 8 byte real (double real) */

       op_extreal = 8,         /* 10 byte real (extended real) */

       op_bcd = 9,             /* 10 byte binary-coded decimal */

       op_simd = 10,           /* 16 byte packed (SIMD, MMX) */


        op_ssimd = 10,          /* 16 byte : 4 packed single FP (SIMD, MMX) */

        op_dsimd = 11,          /* 16 byte : 2 packed double FP (SIMD, MMX) */

        op_sssimd = 12,         /* 4 byte : scalar single FP (SIMD, MMX) */

        op_sdsimd = 13,         /* 8 byte : scalar double FP (SIMD, MMX) */
       op_descr32 = 14,        /* 6 byte Intel descriptor 2:4 */

       op_descr16 = 15,        /* 4 byte Intel descriptor 2:2 */

       op_pdescr32 = 16,       /* 6 byte Intel pseudo-descriptor 32:16 */

       op_pdescr16 = 17,       /* 6 byte Intel pseudo-descriptor 8:24:16 */

       op_fpuenv = 11          /* 28 byte FPU control/environment data */

};

enum x86_op_access {   /* ORed together */

       op_read = 1,

       op_write = 2,

       op_execute = 4

};

enum x86_op_flags {    /* ORed together, but segs are mutually exclusive */

       op_signed = 1,          /* signed integer */

       op_string = 2,          /* possible string or array */

       op_constant = 4,        /* symbolic constant */

       op_pointer = 8,         /* operand points to a memory address */

       op_sysref = 0x010,      /* operand is a syscall number */

       op_implied = 0x020,     /* operand is implicit in insn */

       op_hardcode = 0x040, /* operans is hardcoded in insn */

       op_es_seg = 0x100,      /* ES segment override */

       op_cs_seg = 0x200,      /* CS segment override */

       op_ss_seg = 0x300,      /* SS segment override */

       op_ds_seg = 0x400,      /* DS segment override */

       op_fs_seg = 0x500,      /* FS segment override */

       op_gs_seg = 0x600       /* GS segment override */

};

typedef struct {
       enum x86_op_type        type;           /* operand type */

       enum x86_op_datatype    datatype;       /* operand size */

       enum x86_op_access      access;         /* operand access [RWX] */

       enum x86_op_flags       flags;          /* misc flags */

       union {

               /* immediate values */

               char            sbyte;

               short           sword;

               long            sdword;

               qword           sqword;

               unsigned char   byte;

               unsigned short  word;

               unsigned long   dword;

               qword                   qword;

               float    sreal;  

               double   dreal;  

               /* misc large/non-native types */

               unsigned char   extreal[10];

               unsigned char   bcd[10];

               qword           dqword[2];

               unsigned char   simd[16];

               unsigned char   fpuenv[28];

               /* absolute address */

               void            * address;

               /* offset from segment */

               unsigned long   offset;

               /* ID of CPU register */

               x86_reg_t       reg;

               /* offsets from current insn */

               char            relative_near;

               long            relative_far;

               /* effective address [expression] */

               x86_ea_t        expression;

       } data;

       void * insn;

} x86_op_t;

typedef struct x86_operand_list {
       x86_op_t op;

       struct x86_operand_list *next;

} x86_oplist_t;

enum x86_insn_group {
       insn_none = 0,

       insn_controlflow = 1,

       insn_arithmetic = 2,

       insn_logic = 3,

       insn_stack = 4,

       insn_comparison = 5,

       insn_move = 6,

       insn_string = 7,

       insn_bit_manip = 8,

       insn_flag_manip = 9,

       insn_fpu = 10,

       insn_interrupt = 13,

       insn_system = 14,

       insn_other = 15

};

enum x86_insn_type {
       insn_invalid = 0,

       /* insn_controlflow */

       insn_jmp = 0x1001,

       insn_jcc = 0x1002,

       insn_call = 0x1003,

       insn_callcc = 0x1004,

       insn_return = 0x1005,

       insn_loop = 0x1006,

       /* insn_arithmetic */

       insn_add = 0x2001,

       insn_sub = 0x2002,

       insn_mul = 0x2003,

       insn_div = 0x2004,

       insn_inc = 0x2005,

       insn_dec = 0x2006,

       insn_shl = 0x2007,

       insn_shr = 0x2008,

       insn_rol = 0x2009,

       insn_ror = 0x200A,

       /* insn_logic */

       insn_and = 0x3001,

       insn_or = 0x3002,

       insn_xor = 0x3003,

       insn_not = 0x3004,

       insn_neg = 0x3005,

       /* insn_stack */

       insn_push = 0x4001,

       insn_pop = 0x4002,

       insn_pushregs = 0x4003,

       insn_popregs = 0x4004,

       insn_pushflags = 0x4005,

       insn_popflags = 0x4006,

       insn_enter = 0x4007,

       insn_leave = 0x4008,

       /* insn_comparison */

       insn_test = 0x5001,

       insn_cmp = 0x5002,

       /* insn_move */

       insn_mov = 0x6001,      /* move */

       insn_movcc = 0x6002,    /* conditional move */

       insn_xchg = 0x6003,     /* exchange */

       insn_xchgcc = 0x6004,   /* conditional exchange */

       /* insn_string */

       insn_strcmp = 0x7001,

       insn_strload = 0x7002,

       insn_strmov = 0x7003,

       insn_strstore = 0x7004,

       insn_translate = 0x7005,        /* xlat */

       /* insn_bit_manip */

       insn_bittest = 0x8001,

       insn_bitset = 0x8002,

       insn_bitclear = 0x8003,

       /* insn_flag_manip */

       insn_clear_carry = 0x9001,

       insn_clear_zero = 0x9002,

       insn_clear_oflow = 0x9003,

       insn_clear_dir = 0x9004,

       insn_clear_sign = 0x9005,

       insn_clear_parity = 0x9006,

       insn_set_carry = 0x9007,

       insn_set_zero = 0x9008,

       insn_set_oflow = 0x9009,

       insn_set_dir = 0x900A,

       insn_set_sign = 0x900B,

       insn_set_parity = 0x900C,

       insn_tog_carry = 0x9010,

       insn_tog_zero = 0x9020,

       insn_tog_oflow = 0x9030,

       insn_tog_dir = 0x9040,

       insn_tog_sign = 0x9050,

       insn_tog_parity = 0x9060,

       /* insn_fpu */

       insn_fmov = 0xA001,

       insn_fmovcc = 0xA002,

       insn_fneg = 0xA003,

       insn_fabs = 0xA004,

       insn_fadd = 0xA005,

       insn_fsub = 0xA006,

       insn_fmul = 0xA007,

       insn_fdiv = 0xA008,

       insn_fsqrt = 0xA009,

       insn_fcmp = 0xA00A,

       insn_fcos = 0xA00C,

       insn_fldpi = 0xA00D,

       insn_fldz = 0xA00E,

       insn_ftan = 0xA00F,

       insn_fsine = 0xA010,

       insn_fsys = 0xA020,

       /* insn_interrupt */

       insn_int = 0xD001,

       insn_intcc = 0xD002,    /* not present in x86 ISA */

       insn_iret = 0xD003,

       insn_bound = 0xD004,

       insn_debug = 0xD005,

       insn_trace = 0xD006,

       insn_invalid_op = 0xD007,

       insn_oflow = 0xD008,

       /* insn_system */

       insn_halt = 0xE001,

       insn_in = 0xE002,       /* input from port/bus */

       insn_out = 0xE003,      /* output to port/bus */

       insn_cpuid = 0xE004,

       /* insn_other */

       insn_nop = 0xF001,

       insn_bcdconv = 0xF002,  /* convert to or from BCD */

       insn_szconv = 0xF003    /* change size of operand */

};

enum x86_insn_note {
       insn_note_ring0         = 1,    /* Only available in ring 0 */

       insn_note_smm           = 2,    /* "" in System Management Mode */

       insn_note_serial        = 4     /* Serializing instruction */

};

enum x86_flag_status {
       insn_carry_set = 0x1,

       insn_zero_set = 0x2,

       insn_oflow_set = 0x4,

       insn_dir_set = 0x8,

       insn_sign_set = 0x10,

       insn_parity_set = 0x20,

       insn_carry_or_zero_set = 0x40,

       insn_zero_set_or_sign_ne_oflow = 0x80,

       insn_carry_clear = 0x100,

       insn_zero_clear = 0x200,

       insn_oflow_clear = 0x400,

       insn_dir_clear = 0x800,

       insn_sign_clear = 0x1000,

       insn_parity_clear = 0x2000,

       insn_sign_eq_oflow = 0x4000,

       insn_sign_ne_oflow = 0x8000

};

enum x86_insn_cpu {
       cpu_8086        = 1,    /* Intel */

       cpu_80286       = 2,

       cpu_80386       = 3,

       cpu_80387       = 4,

       cpu_80486       = 5,

       cpu_pentium     = 6,

       cpu_pentiumpro  = 7,

       cpu_pentium2    = 8,

       cpu_pentium3    = 9,
>br>         cpu_pentium4    = 10,

       cpu_k6          = 16,   /* AMD */

       cpu_k7          = 32,

       cpu_athlon      = 48

};

enum x86_insn_isa {
       isa_gp          = 1,    /* general purpose */

       isa_fp          = 2,    /* floating point */

       isa_fpumgt      = 3,    /* FPU/SIMD management */

       isa_mmx         = 4,    /* Intel MMX */

       isa_sse1        = 5,    /* Intel SSE SIMD */

       isa_sse2        = 6,    /* Intel SSE2 SIMD */

       isa_sse3        = 7,    /* Intel SSE3 SIMD */

       isa_3dnow       = 8,    /* AMD 3DNow! SIMD */

       isa_sys         = 9     /* system instructions */

};

enum x86_insn_prefix {
       insn_no_prefix = 0,

       insn_rep_zero = 1,

       insn_rep_notzero = 2,

       insn_lock = 4,

       insn_delay = 8

};

typedef struct {
       /* information about the instruction */

       unsigned long addr;             /* load address */

       unsigned long offset;           /* offset into file/buffer */

       enum x86_insn_group group;      /* meta-type, e.g. INSN_EXEC */

       enum x86_insn_type type;        /* type, e.g. INSN_BRANCH */

       unsigned char bytes[MAX_INSN_SIZE];

       unsigned char size;             /* size of insn in bytes */

       enum x86_insn_prefix prefix;

       enum x86_flag_status flags_set; /* flags set or tested by insn */

       enum x86_flag_status flags_tested;

       /* the instruction proper */

       char prefix_string[32];         /* prefixes [might be truncated] */

       char mnemonic[8];

       x86_op_t operands[3];

       /* convenience fields for user */

       void *block;                    /* code block containing this insn */

       void *function;                 /* function containing this insn */

       void *tag;                      /* tag the insn as seen/processed */

} x86_insn_t;
#define X86_WILDCARD_BYTE 0xF4

typedef struct {

        enum x86_op_type        type;           /* operand type */

        enum x86_op_datatype    datatype;       /* operand size */

        enum x86_op_access      access;         /* operand access [RWX] */

        enum x86_op_flags       flags;          /* misc flags */
} x86_invariant_op_t;

typedef struct {
       unsigned char bytes[64];        /* invariant representation */

       unsigned int size;             /* number of bytes in insn */


        enum x86_insn_group group;      /* meta-type, e.g. INSN_EXEC */

        enum x86_insn_type type;        /* type, e.g. INSN_BRANCH */
       x86_invariant_op_t operands[3]; /* use same ordering as x86_insn_t */

} x86_invariant_t;

EXAMPLES

The following sample callback outputs instructions in raw syntax:

void raw_print( x86_insn_t *insn, void *arg ) {

        char line[1024];

        x86_format_insn(insn, line, 1024, raw_syntax);

        printf( "%s, line);
}

The following sample resolver performs very limited checking on the operand of a jump or call to determine what program address the operand refers to:

long resolver( x86_op_t *op, x86_insn_t *insn ) {

        long retval = -1;

        /* this is a flat ripoff of internal_resolver in libdis.c --

           we don't do any register or stack resolving, or check

           to see if we have already encountered this RVA */

        if ( op->type == op_absolute || op->type == op_offset ) {

                retval = op->data.sdword; 

        } else if (op->type == op_relative ){

                if ( op->datatype == op_byte ) {

                        retval = insn->addr + insn->size + op->data.sbyte;

                } else if ( op->datatype == op_word ) {

                        retval = insn->addr + insn->size + op->data.sword;

                } else if ( op->datatype == op_dword ) {

                        retval = insn->addr + insn->size + op->data.sdword;

                }

        }

        return( retval );
}

The following code snippets demonstrate how to use the various disassembly routines:


        unsigned char *buf;  /* buffer of bytes to disassemble */

        unsigned int buf_len;/* length of buffer */

        unsigned long rva;   /* load address of start of buffer */

        unsigned int pos;    /* position in buffer */

        x86_insn_t insn;     /* disassembled instruction */


        /* disassemble entire buffer, printing automatically */

        x86_disasm_range( buf, buf_rva, pos, buf_len,

                          raw_print, NULL );


        /* disassemble a single instruction, then print it */

        if (x86_disasm( buf, buf_len, buf_rva, pos, &insn ) ) {

                raw_print( &insn, NULL );

        }


        /* disassemble forward in 'buf' starting at 'pos' */

        x86_disasm_forward( buf, buf_len, buf_rva, pos,

                            raw_print, NULL, resolver );