1 /// Object-oriented wrapper of Capstone disassembly engine
2 module capstone.capstone;
3 
4 import std.typecons: Tuple, BitFlags, Yes, Nullable;
5 import std.format: format;
6 import std.conv: to;
7 import std.array: array, appender;
8 import std.range: isInputRange, enumerate, front;
9 import std.algorithm: canFind;
10 import std.traits: EnumMembers;
11 
12 import capstone.api;
13 import capstone.instruction;
14 import capstone.internal;
15 import capstone.error;
16 
17 /** Encapsulates an instance of the Capstone dissassembly engine
18 
19 This class encapsulates the core functionality of the Capstone disassembly engine, providing
20 access to runtime options for
21 $(UL
22     $(LI changing the `Mode` of interpretation)
23     $(LI changing the `Syntax` of the disassembly)
24     $(LI choosing whether `Instruction`'s should be disassembled in detail, i.e. filling `Instruction.detail`)
25     $(LI defining manual handling of broken instructions through the $(LINK2 http://www.capstone-engine.org/skipdata.html, SKIPDATA) mode of operation (optionally via a `Callback`))
26 )
27 
28 Note that, since the architecture is chosen at runtime, this base class only provides access to the architecture-indepentent aspects,
29 e.g. disasm returns `Instruction`s instead of `X86Instruction`s.
30 However, if necessary, it can be casted to the architecture-specific variant, such as `CapstoneX86`.
31 */
32 abstract class Capstone{
33     package {
34         alias Handle = size_t;
35         Handle handle;    
36         
37         ModeFlags _mode;
38         Syntax _syntax;
39         bool _detail;
40         bool _skipData;
41 
42         string mnemonic;
43         Callback callback;
44 
45         string[int] customMnemonics;
46     }
47     const Arch arch; /// The architecture this Capstone instance is set up for
48 
49     /** Constructs an instance of the disassembly engine
50 
51     Params:
52         arch = The architecture the engine will be created for
53         modeFlags = A combination of flags to further specify how bytes will be interpreted, e.g. in little-endian.
54     */
55     private this(in Arch arch, in ModeFlags modeFlags){
56         const libVer = versionOfLibrary;
57         const bindVer = versionOfBindings;
58         if(libVer != bindVer)
59             throw new CapstoneException("API version mismatch between library (%s) and bindings (%s)".format(libVer, bindVer), ErrorCode.UnsupportedVersion);
60 
61         // Create Capstone engine instance
62         this.arch = arch;
63         this._mode = modeFlags;
64         cs_open(arch, modeFlags.to!uint, &handle).checkErrno;
65     }
66 
67     ~this(){
68         if(handle)
69            cs_close(&handle).checkErrno;
70     }
71     
72     /// Gets the mode of interpretation
73     @property auto mode() const {return _mode;}
74     /// Sets the mode of interpretation
75     @property void mode(in ModeFlags modeFlags){
76         _mode = modeFlags;
77         cs_option(handle, cs_opt_type.CS_OPT_MODE, modeFlags.to!uint).checkErrno;
78     }
79 
80     /// Gets the disassembly syntax variant
81     @property auto syntax() const {return _syntax;}
82     /// Sets the disassembly syntax variant
83     @property void syntax(in Syntax option){
84         _syntax = option;
85         cs_option(handle, cs_opt_type.CS_OPT_SYNTAX, option).checkErrno;
86     }
87 
88     /// Indicates whether instructions will be disassembled in detail
89     @property auto detail() const {return _detail;}
90     /// Sets whether instructions will be disassembled in detail
91     @property void detail(in bool enable){
92         _detail = enable;
93         auto option = enable ? cs_opt_value.CS_OPT_ON : cs_opt_value.CS_OPT_OFF;
94         cs_option(handle, cs_opt_type.CS_OPT_DETAIL, option).checkErrno;
95     }
96 
97     /// Indicates whether SKIPDATA mode of operation is in use
98     @property auto skipData() const {return _skipData;}
99     /// Sets whether to use SKIPDATA mode of operation
100     @property void skipData(in bool enable){
101         _skipData = enable;
102         auto option = enable ? cs_opt_value.CS_OPT_ON : cs_opt_value.CS_OPT_OFF;
103         cs_option(handle, cs_opt_type.CS_OPT_SKIPDATA, option).checkErrno;
104     }
105 
106     /** Customises behaviour in SKIPDATA mode of operation
107      
108     By default, disassembling will stop when it encounters a broken instruction.
109     Most of the time, the reason is that this is data mixed inside the input.
110 
111     When in SKIPDATA mode, some (unknown) amount of data until the next interpretable instruction will be skipped.
112     Capstone considers the skipped data a special instruction with ID 0x00 and a `mnemonic` that defaults to `".byte"`.
113     The operand string is a hex-code of the sequence of bytes it skipped.
114 
115     By default, for each iteration, Capstone skips 1 byte on X86 architecture, 2 bytes on Thumb mode on Arm
116     architecture, and 4 bytes for the rest. The reason while Capstone skips 1 byte on X86 is that X86 puts no
117     restriction on instruction alignment, but other architectures enforce some requirements on this aspect.
118 
119     To customise how many bytes to skip when encountering data, a `Callback` delegate can optonally be setup
120     to return the corresponding number.
121 
122     Params:
123         mnemonic = The mnemonic to use for representing skipped data
124         callback = The optional callback to use for handling bytes that cannot be interpreted as an instruction.
125     
126     Example:
127     ---
128     // Custom data that can be referred to in a callback delegate
129     struct CallbackData{
130         int bytesToSkip;
131     }
132     auto myData = CallbackData(1);
133 
134     size_t myCallback(in ubyte[] code, size_t offset) {
135         return myData.bytesToSkip++; // Always skip one more byte when encountering data
136     }
137 
138     cs.skipData = true;                     // Enable skipdata mode
139     cs.setupSkipdata("db", &myCallback);    // Use custom callback, and "db" as custom mnemonic for data
140     ---
141     */
142     void setupSkipdata(in string mnemonic = ".byte", Callback callback = null){
143         if(!mnemonic)
144             throw new CapstoneException("Invalid mnemonic", ErrorCode.InvalidOption);
145         this.mnemonic = mnemonic;
146         this.callback = callback;
147         
148         auto setup = cs_opt_skipdata(this.mnemonic.ptr, this.callback ? &cCallback : null, &this.callback);
149         cs_option(handle, cs_opt_type.CS_OPT_SKIPDATA_SETUP, cast(size_t)&setup).checkErrno;
150     }
151 
152     /// Defines a custom mnemonic for a specified instruction id
153     private void customMnemonic(in int id, in string mnem = null) {
154         auto optMnem = cs_opt_mnem(id, null);
155         if(mnem != null){
156             const v = (customMnemonics[id] = mnem);
157             optMnem.mnemonic = v.ptr;
158         }else
159             customMnemonics.remove(id);
160         cs_option(handle, cs_opt_type.CS_OPT_MNEMONIC, cast(size_t)&optMnem).checkErrno;
161     }
162 
163     /** Disassemble binary code, given the code buffer, start address and number of instructions to be decoded
164     
165     Params:
166         code    = Buffer containing raw binary code to be disassembled
167         address = Address of the first instruction in given raw code buffer
168         count   = Number of instructions to be disassembled, or 0 to get all of them
169     Returns: The successfully disassembled instructions
170 
171     Example:
172     ---
173     auto CODE = cast(ubyte[])"\x8d\x4c\x32\x08\x01\xd8\x81\xc6\x34\x12\x00\x00\x00\x91\x92";
174     auto cs = new CapstoneX86(ModeFlags(Mode.bit32)); // Initialise x86 32bit engine
175     auto res = cs.disasm(CODE, 0x1000);               // Disassemble, offsetting addresses by 0x1000
176     assert("%s %s".format(res[0].mnemonic, res[0].opStr) == "lea ecx, dword ptr [edx + esi + 8]");
177     assert("%s %s".format(res[1].mnemonic, res[1].opStr) == "add eax, ebx");
178     assert("%s %s".format(res[2].mnemonic, res[2].opStr) == "add esi, 0x1234");
179     ---
180     */
181     abstract const(Instruction)[] disasm(in ubyte[] code, in ulong address, in size_t count = 0) const;
182 
183     /** Provides a range to iteratively disassemble binary code - one instruction at a time
184 
185     Fast API to disassemble binary code, given the code buffer and start address.
186     Provides access to only one disassembled instruction at a time, resulting in a smaller memory footprint.
187     Params:
188         code    = Buffer containing raw binary code to be disassembled
189         address = Address of the first instruction in given raw code buffer
190     Returns: An input range over the disassembled instructions
191     Example:
192     ---
193     auto CODE = cast(ubyte[])"\x8d\x4c\x32\x08\x01\xd8\x81\xc6\x34\x12\x00\x00\x00\x91\x92";
194     auto cs = new CapstoneX86(ModeFlags(Mode.bit32)); // Initialise x86 32bit engine
195     auto range = cs.disasmIter(CODE, 0x1000);         // Disassemble one instruction at a time, offsetting addresses by 0x1000
196     assert("%s %s".format(range.front.mnemonic, range.front.opStr) == "lea ecx, dword ptr [edx + esi + 8]");
197     range.popFront;
198     assert("%s %s".format(range.front.mnemonic, range.front.opStr) == "add eax, ebx");
199     range.popFront;
200     assert("%s %s".format(range.front.mnemonic, range.front.opStr) == "add esi, 0x1234");
201     range.popFront;
202     assert(range.empty);
203     ---
204     */
205     abstract InstructionRange disasmIter(in ubyte[] code, in ulong address) const;
206 }
207 
208 // TODO: Try switching to InputRange!Instruction (more restrictive than isInputRange, though)
209 /// An input range that provides access to one disassembled `Instruction` at a time
210 abstract class InstructionRange {
211     /// Retrieves element of the range
212     Instruction front();
213     /// True if range has no instructions, i.e. cannot be advanced anymore
214     bool empty();
215     /// Drops the front instruction and advances the range
216     void popFront();
217 }
218 static assert(isInputRange!InstructionRange);
219 
220 unittest{ // disasm
221     auto CODE = cast(ubyte[])"\x8d\x4c\x32\x08\x01\xd8\x81\xc6\x34\x12\x00\x00\x00\x91\x92";
222     auto cs = create(Arch.x86, ModeFlags(Mode.bit16));
223     cs.mode = ModeFlags(Mode.bit32);
224 
225     auto res = cs.disasm(CODE, 0x1000);
226     assert(res.length == 3); // With skipdata disabled, disassembling will halt when encountering data
227     assert("%s %s".format(res[0].mnemonic, res[0].opStr) == "lea ecx, [edx + esi + 8]");
228     assert("%s %s".format(res[1].mnemonic, res[1].opStr) == "add eax, ebx");
229     assert("%s %s".format(res[2].mnemonic, res[2].opStr) == "add esi, 0x1234");
230     cs.skipData = true;
231     res = cs.disasm(CODE, 0x1000, 5);
232     assert(res.length == 5);
233 }
234 
235 /** Class template that encapsulates an architecture-specific instance of the Capstone dissassembly engine
236 
237 Note that all architecture-specific instances, like `CapstoneX86`, instantiate and derive from this one.
238 
239 Params:
240     TInstructionId = The architecture-specific instruction identifier type
241     TInstruction = The architecture-specific instruction type
242 */
243 abstract class CapstoneImpl(TInstructionId, TInstruction) : Capstone {
244     import capstone.range: InstructionImplRange;
245 
246     /** Creates an architecture-specific instance with a given mode of interpretation
247     
248     Params:
249         arch = The architecture the engine will be created for
250         modeFlags = The (initial) mode of interpretation, which can still be changed later on
251     */
252     package this(in Arch arch, in ModeFlags modeFlags){
253         super(arch, modeFlags);
254     }
255 
256     override TInstruction[] disasm(in ubyte[] code, in ulong address, in size_t count = 0) const {
257         auto instrAppnd = appender!(TInstruction[]);
258         foreach(i, instr; disasmIter(code, address).enumerate){
259             instrAppnd.put(instr);
260             if(i+1==count)
261                 break;
262         }
263         return instrAppnd.data;
264     }
265 
266     override InstructionImplRange!TInstruction disasmIter(in ubyte[] code, in ulong address) const {
267         return new InstructionImplRange!TInstruction(this, code, address);
268     }
269 
270     /** Defines a custom mnemonic for a specified instruction id
271     
272     Example:
273     ---
274     enum X86_CODE32 = cast(ubyte[])"\x75\x01";
275     auto cs = new CapstoneX86(ModeFlags(Mode.bit32));
276     
277     // Customize mnemonic JNE to JNZ
278     cs.customMnemonic(X86InstructionId.jne, "jnz");
279     
280     foreach(instr; cs.disasm(X86_CODE32, 0x1000))
281         writefln("%s\t%s", instr.mnemonic, instr.opStr);
282     
283     // Reset engine to use the default mnemonic of JNE
284     cs.customMnemonic(X86InstructionId.jne);
285     
286     foreach(instr; cs.disasm(X86_CODE32, 0x1000))
287         writefln("%s\t%s", instr.mnemonic, instr.opStr);
288     ---
289     */
290     void customMnemonic(in TInstructionId id, in string mnem = null) {
291         super.customMnemonic(id, mnem);
292     }
293 }