diff --git a/2.function_discovery_and_renaming.py b/2.function_discovery_and_renaming.py index cdf9270..120f6ee 100644 --- a/2.function_discovery_and_renaming.py +++ b/2.function_discovery_and_renaming.py @@ -236,6 +236,7 @@ def _get_func_name_ea(self, idx): def enumerate_functions(self): for idx in range(self.nfunctab): func_addr = self._get_func_addr(idx) + idaapi.add_func(func_addr) func_name_ea = self._get_func_name_ea(idx) try: func_name = ida_bytes.get_strlit_contents(func_name_ea, -1, STRTYPE_C) @@ -283,10 +284,55 @@ def parse_pcln(start_ea): else: return GoPclnTab12(start_ea) + +def is_qword(addr): + """ Check if the given address contains a qword. """ + if idaapi.get_item_size(addr) == 8: + return True + return False + +def undefine_qword_array(start_ea, length): + """ Undefine the qword array starting at 'start_ea' for 'length' qwords. """ + for i in range(length): + idaapi.del_items(start_ea + i * 8, idaapi.DELIT_SIMPLE) + + +def detect_and_undefine_qword_arrays(): + # Get the number of segments + segment_count = ida_segment.get_segm_qty() + + # Iterate through all segments + for index in range(segment_count): + seg = ida_segment.getnseg(index) + if seg: + start_ea = seg.start_ea + seg_end = seg.end_ea + + current_ea = start_ea + + while current_ea < seg_end: + if is_qword(current_ea): + # Determine the length of the qword array dynamically + length = 1 + while is_qword(current_ea + length * 8): + length += 1 + + # Undefine the detected qword array + undefine_qword_array(current_ea, length) + + # Move to the next address after the qword array + current_ea += length * 8 + else: + # Move to the next address + current_ea += 1 + def renamer_init(): + + detect_and_undefine_qword_arrays() + renamed = 0 - gopclntab = get_gopclntab_seg() + gopclntab = get_gopclntab_seg() # if goplcntab is None: # add my code here if gopclntab is not None: @@ -337,13 +383,13 @@ def pointer_renamer(): # Look at data xrefs to the function - find the pointer that is located in .rodata data_ref = idaapi.get_first_dref_to(addr) while data_ref != BADADDR: - if 'rodata' in get_segm_name(data_ref): - # Only rename things that are currently listed as an offset; eg. off_9120B0 - if 'off_' in ida_name.get_ea_name(data_ref): - if idc.set_name(data_ref, ('ptr_%s' % name)): - renamed += 1 - else: - error('error attempting to name pointer @ 0x%02x for %s' % (data_ref, name)) + # if 'rodata' in get_segm_name(data_ref): - this may not hold true if it's dumped from memory + # Only rename things that are currently listed as an offset; eg. off_9120B0 + if 'off_' in ida_name.get_ea_name(data_ref): + if idc.set_name(data_ref, ('ptr_%s' % name)): + renamed += 1 + else: + error('error attempting to name pointer @ 0x%02x for %s' % (data_ref, name)) data_ref = idaapi.get_next_dref_to(addr, data_ref) @@ -364,4 +410,4 @@ def main(): info('Found and successfully renamed %d function pointers!' % pointers_renamed) if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/5.extract_types.py b/5.extract_types.py index be08be0..a01e36b 100644 --- a/5.extract_types.py +++ b/5.extract_types.py @@ -57,8 +57,10 @@ __int64 mhdr; }; """ +DEBUG = False +cache_data_addr = -1 -def find_type_structures(func_name, search_len=15): +def find_type_structures(func_name, valid_register, search_len=15): """ Looks for all types passed as argument to the given function. Probably only works for Go > 1.15 where the register calling convention was introduced. @@ -71,66 +73,285 @@ def find_type_structures(func_name, search_len=15): for f in Functions(): if ida_funcs.get_func_name(f) == func_name: for ref in XrefsTo(f): - # Find the type argument of that function - for h in Heads(ref.frm - search_len, ref.frm): + + # Check that reference is defined as code + if not is_code(idaapi.get_flags(ref.frm)): + continue + + # Find the type argument of that function in reverse order + for h in reversed(list(Heads(ref.frm - search_len, ref.frm))): print(f"Instruction: {hex(h)} - {print_insn_mnem(h)}") - if "lea" == print_insn_mnem(h) and (get_operand_type(h, 1) == o_imm or get_operand_type(h, 1) == o_mem) and (print_operand(h, 0) == "rcx" or print_operand(h, 0) == "rax"): - print("FOUND") - type_addresses.add(get_operand_value(h, 1)) - break - break + if "lea" == print_insn_mnem(h) and (get_operand_type(h, 1) ==o_imm or get_operand_type(h, 1) == o_mem): + + if (print_operand(h, 0) == valid_register):#(print_operand(h, 0) == "rcx" or print_operand(h, 0) == "rax"or print_operand(h, 0) == "rdi"): + print("FOUND") + type_addresses.add(get_operand_value(h, 1)) + break + + break # No need to loop through other functions since we have found our function return type_addresses +def is_in_segments(ea): + # Iterate over all segments + for i in range(ida_segment.get_segm_qty()): + seg = ida_segment.getnseg(i) + if seg is None: + continue + + # Get segment boundaries + start_ea = seg.start_ea + end_ea = seg.end_ea + + # Check if the address is within this segment + if start_ea <= ea < end_ea: + return True + + return False + +def print_debug_msg(msg): + if DEBUG: + print(msg) + +def undefine_range(start_addr, size): + + for offset in range(size): + ida_bytes.del_items(start_addr + offset) + +def define_qword(start_addr, num_qword): + + for idx in range(num_qword): + create_data(start_addr+idx*8, FF_QWORD, 8, BADADDR) + + +def get_struct_variable_name(addr): + + ## Unsure of what the first byte refer to + # variable_name_header = ida_bytes.get_byte(addr) + # if variable_name_header != 1 and variable_name_header != 3 and variable_name_header != 0: + # print_debug_msg(str(hex(addr)) + ' - Invalid variable name header') + # return '' + + variable_name_len = ida_bytes.get_byte(addr+1) + variable_name = ida_bytes.get_strlit_contents(addr+2, variable_name_len, STRTYPE_C) + + return variable_name + +def parse_struct_variables(start_ea, num_variables): + + undefine_range(start_ea, num_variables*3*0x8) + define_qword(start_ea, num_variables*3) + + # Parse each variable in struct + for variable_idx in range(num_variables): + # variable consists of the following pattern + # ptr_to_name_of_variable + # variable_type + # offset + curr_ea = start_ea+variable_idx*0x8*3 + + # Get name and set cmt next to the pointer + variable_name_ptr = get_qword(curr_ea) + variable_name = get_struct_variable_name(variable_name_ptr) + + if variable_name is not None: + set_cmt(curr_ea, variable_name.decode(errors="replace"), False) + + # Ensure that the type is resolved if not resolve the type + if idc.get_type(get_qword(curr_ea+8)) != 'golang_type': + print_debug_msg("Parsing type of variable at " + str(hex(curr_ea+8))) + parse_type(get_qword(curr_ea+8)) + +def get_data_addr(): + + global cache_data_addr + + if cache_data_addr != -1: + return cache_data_addr + + + data_addr = -1 + + # This doesn't seem reliable because it might not be the data segment we want in + # some samples + # for s in Segments(): + # if (get_segm_name(s) == ".rdata") or (get_segm_name(s) == "__rodata"): + # data_addr = get_segm_start(s) + # cache_data_addr = data_addr + + + if data_addr == -1: + # Could be due to various reasons + # - Tampered sections + # - Dumped from memory + # Try searching for it + _rdata_magic = b"\x00\x00\x01\x01\x41\x01\x01\x42" + mask = bytes([0xFF] * len(_rdata_magic)) + + seg_qty = ida_segment.get_segm_qty() + + for seg_idx in range(seg_qty): + seg = ida_segment.getnseg(seg_idx) + if seg is None: + continue + + start_ea = seg.start_ea + end_ea = seg.end_ea + + found_ea = ida_bytes.bin_search(start_ea, end_ea, _rdata_magic, mask, ida_search.SEARCH_DOWN, 0) + + + if found_ea != idaapi.BADADDR: + data_addr = found_ea + cache_data_addr = data_addr + return data_addr + print("Could not find .rdata segment!") + return data_addr + else: + return data_addr + +def parse_struct_with_name(addr): + # variable_size - offset 0x40 + # variable_size - offset 0x48 + # offset_ptr_to_module_name - offset 0x50 + # |_ 0 + # |_ size + # |_ string + # size of structure - offset 0x58 + # Start of variable + + data_addr = cache_data_addr + + + + variable_size = get_qword(addr+0x40) + offset_ptr_to_module_name = get_qword(addr+0x50) + module_name = data_addr + offset_ptr_to_module_name + + + + # Check whether module_name is within segments + if is_in_segments(module_name): + + if get_wide_byte(module_name) == 0: + name_size = get_wide_byte(module_name+1) + module_name_str = get_strlit_contents(module_name+2, name_size) + set_cmt(addr+0x50, module_name_str.decode(errors="replace"), False) + else: + print_debug_msg(hex(module_name)) + print_debug_msg(get_wide_byte(module_name)) + print_debug_msg("Invalid name") + + + + size_of_next_structure = get_qword(addr+0x58) + + undefine_range(addr+0x40, 0x20) + define_qword(addr+0x40, 0x4) + + variable_start_addr = get_qword(addr+0x38) + parse_struct_variables(variable_start_addr, variable_size) + + + + +def parse_struct_without_name(addr): + # variable_size - offset 0x40 + # variable_size - offset 0x48 + # Start of variable + variable_size = get_qword(addr+0x40) + + undefine_range(addr+0x40, 0x10) + + define_qword(addr+0x40, 2) + + # Parse each variable of struct + variable_start_addr = get_qword(addr+0x38) + parse_struct_variables(variable_start_addr, variable_size) + + + + + +def parse_member(addr): + # Supports only struct type + if get_wide_byte(addr+0x17) != 0x19: + print_debug_msg(str(hex(addr)) + " - Not struct type") + return + + if get_qword(addr+0x40) == get_qword(addr+0x48): # Ensure the two values are equal so we can safely assume member_size + + if get_wide_byte(addr+0x14) & 0x4 != 0: # Tflags has name - https://github.com/golang/go/blob/release-branch.go1.23/src/internal/abi/type.go#L109 + parse_struct_with_name(addr) + else: + parse_struct_without_name(addr) + + else: + print_debug_msg(str(hex(addr)) + " - Unmatched member size") + return + + + def parse_type(addr): - """ - Applies the correct structure to the type at the given address and locates its name. - """ - SetType(addr, "golang_type") - data_addr = -1 - for s in Segments(): - if (get_segm_name(s) == ".rdata") or (get_segm_name(s) == "__rodata"): - data_addr = get_segm_start(s) - if data_addr == -1: - print("Could not find .rdata segment!") - return False - - # nameOff is an offset into rdata. We end up on a structure where the first byte is a bitfield - # followed by the size of the string followed by the name of the type. - # https://github.com/golang/go/blob/release-branch.go1.16/src/reflect/type.go#L443 - nameOff = get_wide_dword(addr + 0x28) + data_addr - if nameOff == data_addr: - return True # No type string, just move on - - # Starting from Go 1.17 (?), the size is provided as a varint-encoded length. - size = get_wide_byte(nameOff + 1) << 8 | get_wide_byte(nameOff + 2) - if size > 0xFF: # Quick & dirty sanity check. - size = get_wide_byte(nameOff + 1) # This is almost certain to break eventually - type_str = get_strlit_contents(nameOff + 2, size) - else: - type_str = get_strlit_contents(nameOff + 3, size) - if not type_str: - print(f"Could not obtain type name for {hex(addr)} at address {hex(nameOff)}") - del_items(addr) # Was probably a FP, delete the structure and move on - return True - set_cmt(addr, type_str.decode(errors="replace"), False) - for ref in XrefsTo(addr): - set_cmt(ref.frm, type_str.decode(errors="replace"), False) - # Rename the structure too. 0x800 = SN_FORCE, not available for some reason - # See https://hex-rays.com/products/ida/support/idadoc/203.shtml - set_name(addr, "type_" + type_str.decode(errors="replace")[:20], SN_NOCHECK | 0x800) - return True + """ + Applies the correct structure to the type at the given address and locates its name. + """ + SetType(addr, "golang_type") + data_addr = get_data_addr() + + # nameOff is an offset into rdata. We end up on a structure where the first byte is a bitfield + # followed by the size of the string followed by the name of the type. + # https://github.com/golang/go/blob/release-branch.go1.16/src/reflect/type.go#L443 + nameOff = get_wide_dword(addr + 0x28) + data_addr + + + if nameOff == data_addr: + return True # No type string, just move on + + # Starting from Go 1.17 (?), the size is provided as a varint-encoded length. + size = get_wide_byte(nameOff + 1) << 8 | get_wide_byte(nameOff + 2) + + if size > 0xFF: # Quick & dirty sanity check. + size = get_wide_byte(nameOff + 1) # This is almost certain to break eventually + type_str = get_strlit_contents(nameOff + 2, size) + else: + type_str = get_strlit_contents(nameOff + 3, size) + if not type_str: + print(f"Could not obtain type name for {hex(addr)} at address {hex(nameOff)}") + del_items(addr) # Was probably a FP, delete the structure and move on + return True + set_cmt(addr, type_str.decode(errors="replace"), False) + for ref in XrefsTo(addr): + set_cmt(ref.frm, type_str.decode(errors="replace"), False) + # Rename the structure too. 0x800 = SN_FORCE, not available for some reason + # See https://hex-rays.com/products/ida/support/idadoc/203.shtml + set_name(addr, "type_" + type_str.decode(errors="replace")[:20], SN_NOCHECK | 0x800) + parse_member(addr) + return True # Import the required IDA structures if necessary if get_struc_id("golang_type") == BADADDR: parse_decls(C_HEADER, idaapi.PT_TYP) # Find all places in the binary where there is type information -addresses = find_type_structures("runtime.newobject") -addresses |= find_type_structures("runtime.makechan", search_len=30) -addresses |= find_type_structures("runtime.makemap", search_len=30) -addresses |= find_type_structures("runtime.mapiterinit", search_len=30) -addresses |= find_type_structures("runtime.makeslice", search_len=30) +addresses = find_type_structures("runtime_newobject", "rax") +addresses |= find_type_structures("runtime_makechan", "rax", search_len=30) +addresses |= find_type_structures("runtime_makemap", "rax", search_len=30) +addresses |= find_type_structures("runtime_mapiterinit", "rax", search_len=30) +addresses |= find_type_structures("runtime_makeslice", "rax", search_len=30) +addresses |= find_type_structures("runtime_makeslicecopy", "rax", search_len=30) +addresses |= find_type_structures("encoding_json_Unmarshal", "rdi", search_len=30) +addresses |= find_type_structures("encoding_json_Marshal", "rax", search_len=30) +addresses |= find_type_structures("runtime_typedslicecopy", "rax", search_len=30) +addresses |= find_type_structures("runtime_growslice", "rsi", search_len=30) +addresses |= find_type_structures("runtime_assertI2I2", "rax", search_len=30) +addresses |= find_type_structures("runtime_assertI2I", "rax", search_len=30) +addresses |= find_type_structures("runtime_assertE2I", "rax", search_len=30) +addresses |= find_type_structures("runtime_assertE2I2", "rax", search_len=30) +addresses |= find_type_structures("golang_org_x_crypto_ssh_Unmarshal", "rdi", search_len=30) +addresses |= find_type_structures("runtime_typedmemclr", "rax", search_len=30) + + # Parse type information for t in addresses: diff --git a/README.md b/README.md index ca3c7ef..1e6c0c4 100644 --- a/README.md +++ b/README.md @@ -55,11 +55,14 @@ The first two steps (recreate_pclntab and function_discovery_and_renaming) will - ## Step 5: Extract type information (by Ivan Kwiatkowski) + - extract_types.py - Comments the arguments of all calls to `newobject`, `makechan`, etc. - Applies the correct C type to these objects and renames them - Obtains the human-readable name and adds it as a comment + - Add support to search for segment with section names + - Parse struct members and recursively parse struct member's type ### Pending fixes and room for contributions: - fix_string_cast.py diff --git a/docs/images/struct_member.png b/docs/images/struct_member.png new file mode 100644 index 0000000..43223a3 Binary files /dev/null and b/docs/images/struct_member.png differ