1 /// Object-oriented wrapper of Capstone disassembly engine 2 module capstone.capstone; 3 4 import std.typecons: Tuple, BitFlags, Yes, Nullable; 5 import std.format: format; 6 import std.conv: to; 7 import std.array: array, appender; 8 import std.range: isInputRange, enumerate, front; 9 import std.algorithm: canFind; 10 import std.traits: EnumMembers; 11 12 import capstone.api; 13 import capstone.instruction; 14 import capstone.internal; 15 import capstone.error; 16 17 /** Encapsulates an instance of the Capstone dissassembly engine 18 19 This class encapsulates the core functionality of the Capstone disassembly engine, providing 20 access to runtime options for 21 $(UL 22 $(LI changing the `Mode` of interpretation) 23 $(LI changing the `Syntax` of the disassembly) 24 $(LI choosing whether `Instruction`'s should be disassembled in detail, i.e. filling `Instruction.detail`) 25 $(LI defining manual handling of broken instructions through the $(LINK2 http://www.capstone-engine.org/skipdata.html, SKIPDATA) mode of operation (optionally via a `Callback`)) 26 ) 27 28 Note that, since the architecture is chosen at runtime, this base class only provides access to the architecture-indepentent aspects, 29 e.g. disasm returns `Instruction`s instead of `X86Instruction`s. 30 However, if necessary, it can be casted to the architecture-specific variant, such as `CapstoneX86`. 31 */ 32 abstract class Capstone{ 33 package { 34 alias Handle = size_t; 35 Handle handle; 36 37 ModeFlags _mode; 38 Syntax _syntax; 39 bool _detail; 40 bool _skipData; 41 42 string mnemonic; 43 Callback callback; 44 45 string[int] customMnemonics; 46 } 47 const Arch arch; /// The architecture this Capstone instance is set up for 48 49 /** Constructs an instance of the disassembly engine 50 51 Params: 52 arch = The architecture the engine will be created for 53 modeFlags = A combination of flags to further specify how bytes will be interpreted, e.g. in little-endian. 54 */ 55 private this(in Arch arch, in ModeFlags modeFlags){ 56 const libVer = versionOfLibrary; 57 const bindVer = versionOfBindings; 58 if(libVer != bindVer) 59 throw new CapstoneException("API version mismatch between library (%s) and bindings (%s)".format(libVer, bindVer), ErrorCode.UnsupportedVersion); 60 61 // Create Capstone engine instance 62 this.arch = arch; 63 this._mode = modeFlags; 64 cs_open(arch, modeFlags.to!uint, &handle).checkErrno; 65 } 66 67 ~this(){ 68 if(handle) 69 cs_close(&handle).checkErrno; 70 } 71 72 /// Gets the mode of interpretation 73 @property auto mode() const {return _mode;} 74 /// Sets the mode of interpretation 75 @property void mode(in ModeFlags modeFlags){ 76 _mode = modeFlags; 77 cs_option(handle, cs_opt_type.CS_OPT_MODE, modeFlags.to!uint).checkErrno; 78 } 79 80 /// Gets the disassembly syntax variant 81 @property auto syntax() const {return _syntax;} 82 /// Sets the disassembly syntax variant 83 @property void syntax(in Syntax option){ 84 _syntax = option; 85 cs_option(handle, cs_opt_type.CS_OPT_SYNTAX, option).checkErrno; 86 } 87 88 /// Indicates whether instructions will be disassembled in detail 89 @property auto detail() const {return _detail;} 90 /// Sets whether instructions will be disassembled in detail 91 @property void detail(in bool enable){ 92 _detail = enable; 93 auto option = enable ? cs_opt_value.CS_OPT_ON : cs_opt_value.CS_OPT_OFF; 94 cs_option(handle, cs_opt_type.CS_OPT_DETAIL, option).checkErrno; 95 } 96 97 /// Indicates whether SKIPDATA mode of operation is in use 98 @property auto skipData() const {return _skipData;} 99 /// Sets whether to use SKIPDATA mode of operation 100 @property void skipData(in bool enable){ 101 _skipData = enable; 102 auto option = enable ? cs_opt_value.CS_OPT_ON : cs_opt_value.CS_OPT_OFF; 103 cs_option(handle, cs_opt_type.CS_OPT_SKIPDATA, option).checkErrno; 104 } 105 106 /** Customises behaviour in SKIPDATA mode of operation 107 108 By default, disassembling will stop when it encounters a broken instruction. 109 Most of the time, the reason is that this is data mixed inside the input. 110 111 When in SKIPDATA mode, some (unknown) amount of data until the next interpretable instruction will be skipped. 112 Capstone considers the skipped data a special instruction with ID 0x00 and a `mnemonic` that defaults to `".byte"`. 113 The operand string is a hex-code of the sequence of bytes it skipped. 114 115 By default, for each iteration, Capstone skips 1 byte on X86 architecture, 2 bytes on Thumb mode on Arm 116 architecture, and 4 bytes for the rest. The reason while Capstone skips 1 byte on X86 is that X86 puts no 117 restriction on instruction alignment, but other architectures enforce some requirements on this aspect. 118 119 To customise how many bytes to skip when encountering data, a `Callback` delegate can optonally be setup 120 to return the corresponding number. 121 122 Params: 123 mnemonic = The mnemonic to use for representing skipped data 124 callback = The optional callback to use for handling bytes that cannot be interpreted as an instruction. 125 126 Example: 127 --- 128 // Custom data that can be referred to in a callback delegate 129 struct CallbackData{ 130 int bytesToSkip; 131 } 132 auto myData = CallbackData(1); 133 134 size_t myCallback(in ubyte[] code, size_t offset) { 135 return myData.bytesToSkip++; // Always skip one more byte when encountering data 136 } 137 138 cs.skipData = true; // Enable skipdata mode 139 cs.setupSkipdata("db", &myCallback); // Use custom callback, and "db" as custom mnemonic for data 140 --- 141 */ 142 void setupSkipdata(in string mnemonic = ".byte", Callback callback = null){ 143 if(!mnemonic) 144 throw new CapstoneException("Invalid mnemonic", ErrorCode.InvalidOption); 145 this.mnemonic = mnemonic; 146 this.callback = callback; 147 148 auto setup = cs_opt_skipdata(this.mnemonic.ptr, this.callback ? &cCallback : null, &this.callback); 149 cs_option(handle, cs_opt_type.CS_OPT_SKIPDATA_SETUP, cast(size_t)&setup).checkErrno; 150 } 151 152 /// Defines a custom mnemonic for a specified instruction id 153 private void customMnemonic(in int id, in string mnem = null) { 154 auto optMnem = cs_opt_mnem(id, null); 155 if(mnem != null){ 156 const v = (customMnemonics[id] = mnem); 157 optMnem.mnemonic = v.ptr; 158 }else 159 customMnemonics.remove(id); 160 cs_option(handle, cs_opt_type.CS_OPT_MNEMONIC, cast(size_t)&optMnem).checkErrno; 161 } 162 163 /** Disassemble binary code, given the code buffer, start address and number of instructions to be decoded 164 165 Params: 166 code = Buffer containing raw binary code to be disassembled 167 address = Address of the first instruction in given raw code buffer 168 count = Number of instructions to be disassembled, or 0 to get all of them 169 Returns: The successfully disassembled instructions 170 171 Example: 172 --- 173 auto CODE = cast(ubyte[])"\x8d\x4c\x32\x08\x01\xd8\x81\xc6\x34\x12\x00\x00\x00\x91\x92"; 174 auto cs = new CapstoneX86(ModeFlags(Mode.bit32)); // Initialise x86 32bit engine 175 auto res = cs.disasm(CODE, 0x1000); // Disassemble, offsetting addresses by 0x1000 176 assert("%s %s".format(res[0].mnemonic, res[0].opStr) == "lea ecx, dword ptr [edx + esi + 8]"); 177 assert("%s %s".format(res[1].mnemonic, res[1].opStr) == "add eax, ebx"); 178 assert("%s %s".format(res[2].mnemonic, res[2].opStr) == "add esi, 0x1234"); 179 --- 180 */ 181 abstract const(Instruction)[] disasm(in ubyte[] code, in ulong address, in size_t count = 0) const; 182 183 /** Provides a range to iteratively disassemble binary code - one instruction at a time 184 185 Fast API to disassemble binary code, given the code buffer and start address. 186 Provides access to only one disassembled instruction at a time, resulting in a smaller memory footprint. 187 Params: 188 code = Buffer containing raw binary code to be disassembled 189 address = Address of the first instruction in given raw code buffer 190 Returns: An input range over the disassembled instructions 191 Example: 192 --- 193 auto CODE = cast(ubyte[])"\x8d\x4c\x32\x08\x01\xd8\x81\xc6\x34\x12\x00\x00\x00\x91\x92"; 194 auto cs = new CapstoneX86(ModeFlags(Mode.bit32)); // Initialise x86 32bit engine 195 auto range = cs.disasmIter(CODE, 0x1000); // Disassemble one instruction at a time, offsetting addresses by 0x1000 196 assert("%s %s".format(range.front.mnemonic, range.front.opStr) == "lea ecx, dword ptr [edx + esi + 8]"); 197 range.popFront; 198 assert("%s %s".format(range.front.mnemonic, range.front.opStr) == "add eax, ebx"); 199 range.popFront; 200 assert("%s %s".format(range.front.mnemonic, range.front.opStr) == "add esi, 0x1234"); 201 range.popFront; 202 assert(range.empty); 203 --- 204 */ 205 abstract InstructionRange disasmIter(in ubyte[] code, in ulong address) const; 206 } 207 208 // TODO: Try switching to InputRange!Instruction (more restrictive than isInputRange, though) 209 /// An input range that provides access to one disassembled `Instruction` at a time 210 abstract class InstructionRange { 211 /// Retrieves element of the range 212 Instruction front(); 213 /// True if range has no instructions, i.e. cannot be advanced anymore 214 bool empty(); 215 /// Drops the front instruction and advances the range 216 void popFront(); 217 } 218 static assert(isInputRange!InstructionRange); 219 220 unittest{ // disasm 221 auto CODE = cast(ubyte[])"\x8d\x4c\x32\x08\x01\xd8\x81\xc6\x34\x12\x00\x00\x00\x91\x92"; 222 auto cs = create(Arch.x86, ModeFlags(Mode.bit16)); 223 cs.mode = ModeFlags(Mode.bit32); 224 225 auto res = cs.disasm(CODE, 0x1000); 226 assert(res.length == 3); // With skipdata disabled, disassembling will halt when encountering data 227 assert("%s %s".format(res[0].mnemonic, res[0].opStr) == "lea ecx, [edx + esi + 8]"); 228 assert("%s %s".format(res[1].mnemonic, res[1].opStr) == "add eax, ebx"); 229 assert("%s %s".format(res[2].mnemonic, res[2].opStr) == "add esi, 0x1234"); 230 cs.skipData = true; 231 res = cs.disasm(CODE, 0x1000, 5); 232 assert(res.length == 5); 233 } 234 235 /** Class template that encapsulates an architecture-specific instance of the Capstone dissassembly engine 236 237 Note that all architecture-specific instances, like `CapstoneX86`, instantiate and derive from this one. 238 239 Params: 240 TInstructionId = The architecture-specific instruction identifier type 241 TInstruction = The architecture-specific instruction type 242 */ 243 abstract class CapstoneImpl(TInstructionId, TInstruction) : Capstone { 244 import capstone.range: InstructionImplRange; 245 246 /** Creates an architecture-specific instance with a given mode of interpretation 247 248 Params: 249 arch = The architecture the engine will be created for 250 modeFlags = The (initial) mode of interpretation, which can still be changed later on 251 */ 252 package this(in Arch arch, in ModeFlags modeFlags){ 253 super(arch, modeFlags); 254 } 255 256 override TInstruction[] disasm(in ubyte[] code, in ulong address, in size_t count = 0) const { 257 auto instrAppnd = appender!(TInstruction[]); 258 foreach(i, instr; disasmIter(code, address).enumerate){ 259 instrAppnd.put(instr); 260 if(i+1==count) 261 break; 262 } 263 return instrAppnd.data; 264 } 265 266 override InstructionImplRange!TInstruction disasmIter(in ubyte[] code, in ulong address) const { 267 return new InstructionImplRange!TInstruction(this, code, address); 268 } 269 270 /** Defines a custom mnemonic for a specified instruction id 271 272 Example: 273 --- 274 enum X86_CODE32 = cast(ubyte[])"\x75\x01"; 275 auto cs = new CapstoneX86(ModeFlags(Mode.bit32)); 276 277 // Customize mnemonic JNE to JNZ 278 cs.customMnemonic(X86InstructionId.jne, "jnz"); 279 280 foreach(instr; cs.disasm(X86_CODE32, 0x1000)) 281 writefln("%s\t%s", instr.mnemonic, instr.opStr); 282 283 // Reset engine to use the default mnemonic of JNE 284 cs.customMnemonic(X86InstructionId.jne); 285 286 foreach(instr; cs.disasm(X86_CODE32, 0x1000)) 287 writefln("%s\t%s", instr.mnemonic, instr.opStr); 288 --- 289 */ 290 void customMnemonic(in TInstructionId id, in string mnem = null) { 291 super.customMnemonic(id, mnem); 292 } 293 }