"""This module provides functionalities for parsing ELF files compatible with 32-bit architectures.
It focuses on extracting structure members and variable information from DWARF debugging information.
"""
import logging
from elftools.elf.elffile import ELFFile
from elftools.elf.sections import SymbolTableSection
from pyx2cscope.parser.elf_parser import ElfParser, VariableInfo
[docs]
class Elf32Parser(ElfParser):
"""Class for parsing ELF files compatible with 32-bit architectures."""
def __init__(self, elf_path):
"""Initialize the Elf32Parser with the given ELF file path."""
self.elf_path = elf_path
self.variable_map = {}
self.symbol_table = {} # Ensure this initialization is included
self.address = None
self.var_name = None
self.die_variable = None
self.elf_file = None
self.dwarf_info = None
self._load_elf_file()
self._load_symbol_table() # Load symbol table entries into a dictionary
[docs]
def _load_elf_file(self):
try:
self.stream = open(self.elf_path, "rb")
self.elf_file = ELFFile(self.stream)
self.dwarf_info = self.elf_file.get_dwarf_info()
except IOError:
raise Exception(f"Error loading ELF file: {self.elf_path}")
[docs]
def close_elf_file(self):
"""Closes the ELF file stream."""
if self.stream:
self.stream.close()
[docs]
def _map_variables(self) -> dict[str, VariableInfo]:
self.variable_map.clear()
for compilation_unit in self.dwarf_info.iter_CUs():
root_die = compilation_unit.iter_DIEs()
tag_variables = filter(lambda die: die.tag == "DW_TAG_variable", root_die)
for die_variable in tag_variables:
self._process_variable_die(die_variable)
return self.variable_map
[docs]
def _process_variable_die(self, die_variable):
"""Process an individual variable DIE."""
if "DW_AT_specification" in die_variable.attributes:
spec_ref_addr = (
die_variable.attributes["DW_AT_specification"].value
+ die_variable.cu.cu_offset
)
spec_die = self.dwarf_info.get_DIE_from_refaddr(spec_ref_addr)
if spec_die.tag == "DW_TAG_variable":
self.die_variable = spec_die
self.var_name = self.die_variable.attributes.get(
"DW_AT_name"
).value.decode("utf-8")
self._extract_address(die_variable)
else:
return
elif (
die_variable.attributes.get("DW_AT_location")
and die_variable.attributes.get("DW_AT_name") is not None
):
self.var_name = die_variable.attributes.get("DW_AT_name").value.decode(
"utf-8"
)
self.die_variable = die_variable
self._extract_address(die_variable)
elif (
die_variable.attributes.get("DW_AT_external")
and die_variable.attributes.get("DW_AT_name") is not None
):
self.var_name = die_variable.attributes.get("DW_AT_name").value.decode(
"utf-8"
)
self.die_variable = die_variable
self._extract_address(die_variable)
else:
return
type_attr = self.die_variable.attributes.get("DW_AT_type")
if type_attr is None:
return
ref_addr = type_attr.value + self.die_variable.cu.cu_offset
type_die = self.dwarf_info.get_DIE_from_refaddr(ref_addr)
if type_die.tag != "DW_TAG_volatile_type":
end_die = self._get_end_die(type_die)
if end_die is None:
logging.warning(
f"Skipping variable {self.var_name} due to missing end DIE"
)
return
self._processing_end_die(end_die)
elif type_die.tag == "DW_TAG_volatile_type":
end_die = self._get_end_die(type_die)
if end_die is None:
logging.warning(
f"Skipping volatile type variable {self.var_name} due to missing end DIE"
)
return
self._processing_end_die(end_die)
[docs]
def _get_end_die(self, current_die):
"""Find the end DIE of a type."""
valid_words = {
"DW_TAG_base_type",
"DW_TAG_pointer_type",
"DW_TAG_structure_type",
"DW_TAG_array_type",
}
while current_die.tag not in valid_words:
if "DW_AT_type" not in current_die.attributes:
logging.warning(
f"Skipping DIE at offset {current_die.offset} with no 'DW_AT_type' attribute"
)
return None
ref_addr = (
current_die.attributes["DW_AT_type"].value + current_die.cu.cu_offset
)
current_die = self.dwarf_info.get_DIE_from_refaddr(ref_addr)
return current_die
[docs]
def _processing_end_die(self, end_die):
"""Processes the end DIE of a tag to extract variable information."""
self._extract_address(self.die_variable)
if self.address is None and not self.die_variable.attributes.get(
"DW_AT_external"
):
return
if end_die.tag == "DW_TAG_pointer_type":
self._process_pointer_type(end_die)
elif end_die.tag == "DW_TAG_structure_type":
self._process_structure_type(end_die)
elif end_die.tag == "DW_TAG_array_type":
self._process_array_type(end_die)
else:
self._process_base_type(end_die)
[docs]
def _load_symbol_table(self):
"""Loads symbol table entries into a dictionary for fast access."""
for section in self.elf_file.iter_sections():
if isinstance(section, SymbolTableSection):
for symbol in section.iter_symbols():
if symbol["st_info"].type == "STT_OBJECT":
self.symbol_table[symbol.name] = symbol["st_value"]
[docs]
def _fetch_address_from_symtab(self, variable_name):
"""Fetches the address of a variable from the preloaded symbol table."""
return self.symbol_table.get(variable_name, None)
[docs]
def _find_actual_declaration(self, die_variable):
"""Find the actual declaration of an extern variable."""
while "DW_AT_specification" in die_variable.attributes:
spec_ref_addr = (
die_variable.attributes["DW_AT_specification"].value
+ die_variable.cu.cu_offset
)
die_variable = self.dwarf_info.get_DIE_from_refaddr(spec_ref_addr)
return die_variable
[docs]
def _process_pointer_type(self, end_die):
"""Process a pointer type variable."""
type_name = "pointer"
self.variable_map[self.var_name] = VariableInfo(
name=self.var_name,
byte_size=end_die.attributes["DW_AT_byte_size"].value,
type=type_name,
address=self.address,
)
[docs]
def _process_structure_type(self, end_die):
"""Process a structure type variable."""
members = self._get_structure_members(end_die, self.var_name)
for member_name, member_data in members.items():
self.variable_map[member_name] = VariableInfo(
name=member_name,
byte_size=member_data["byte_size"],
type=member_data["type"],
address=(
self.address + member_data["address_offset"]
if self.address
else None
),
array_size=member_data["array_size"],
)
[docs]
def _process_array_type(self, end_die):
"""Process an array type variable."""
array_size = self._get_array_length(end_die)
base_type_attr = end_die.attributes.get("DW_AT_type")
if base_type_attr:
base_type_offset = base_type_attr.value + end_die.cu.cu_offset
base_type_die = self.dwarf_info.get_DIE_from_refaddr(base_type_offset)
if base_type_die:
base_type_die = self._get_end_die(base_type_die)
type_name = base_type_die.attributes.get("DW_AT_name")
type_name = type_name.value.decode("utf-8") if type_name else "unknown"
byte_size_attr = base_type_die.attributes.get("DW_AT_byte_size")
byte_size = byte_size_attr.value if byte_size_attr else 0
self.variable_map[self.var_name] = VariableInfo(
name=self.var_name,
byte_size=byte_size,
type=type_name,
address=self.address,
array_size=array_size,
)
[docs]
def _process_base_type(self, end_die):
"""Process a base type variable."""
type_name_attr = end_die.attributes.get("DW_AT_name")
type_name = (
type_name_attr.value.decode("utf-8") if type_name_attr else "unknown"
)
self.variable_map[self.var_name] = VariableInfo(
name=self.var_name,
byte_size=end_die.attributes["DW_AT_byte_size"].value,
type=type_name,
address=self.address,
)
[docs]
def _get_structure_members_recursive(
self, die, parent_name: str, prev_address_offset=0
):
"""Recursively gets structure members from a DWARF DIE."""
members = {}
for child_die in die.iter_children():
if child_die.tag in {
"DW_TAG_member",
"DW_TAG_pointer_type",
"DW_TAG_array_type",
}:
member = {}
member_name = parent_name
name_attr = child_die.attributes.get("DW_AT_name")
if name_attr:
member_name += "." + name_attr.value.decode("utf-8")
type_attr = child_die.attributes.get("DW_AT_type")
if type_attr:
type_offset = type_attr.value + child_die.cu.cu_offset
try:
member_type = self._get_member_type(type_offset)
offset_value = child_die.attributes.get(
"DW_AT_data_member_location"
)
offset_value = int(offset_value.value[1]) if offset_value else 0
nested_die = self._get_end_die(child_die)
if nested_die.tag == "DW_TAG_structure_type":
nested_members, _ = self._get_structure_members_recursive(
nested_die,
member_name,
prev_address_offset + offset_value,
)
if nested_members:
members.update(nested_members)
elif nested_die.tag == "DW_TAG_array_type":
array_size = self._get_array_length(nested_die)
base_type_attr = nested_die.attributes.get("DW_AT_type")
if base_type_attr:
base_type_offset = (
base_type_attr.value + nested_die.cu.cu_offset
)
base_type_die = self.dwarf_info.get_DIE_from_refaddr(
base_type_offset
)
base_type_die = self._get_end_die(base_type_die)
if base_type_die:
type_name = base_type_die.attributes.get(
"DW_AT_name"
)
type_name = (
type_name.value.decode("utf-8")
if type_name
else "unknown"
)
byte_size_attr = base_type_die.attributes.get(
"DW_AT_byte_size"
)
byte_size = (
byte_size_attr.value if byte_size_attr else 0
)
member["type"] = type_name
member["byte_size"] = byte_size
member["address_offset"] = (
prev_address_offset + offset_value
)
member["array_size"] = array_size
members[member_name] = member
else:
member["type"] = member_type["name"]
member["byte_size"] = member_type["byte_size"]
member["address_offset"] = (
prev_address_offset + offset_value
)
member["array_size"] = self._get_array_length(child_die)
members[member_name] = member
except Exception as e:
logging.error("exception", exc_info=e)
continue
return members, prev_address_offset
[docs]
def _get_structure_members(self, structure_die, var_name):
"""Retrieves structure members from a DWARF DIE."""
return self._get_structure_members_recursive(structure_die, var_name)[0]
[docs]
def _get_array_length(self, type_die):
"""Gets the length of an array type."""
for child in type_die.iter_children():
if child.tag == "DW_TAG_subrange_type":
array_length_attr = child.attributes.get("DW_AT_upper_bound")
if array_length_attr:
array_length = array_length_attr.value + 1
return array_length
return 0
[docs]
def _get_member_type(self, type_offset):
"""Retrieve the type information from DWARF given a type offset."""
type_die = self.dwarf_info.get_DIE_from_refaddr(type_offset)
if type_die:
type_die = self._get_end_die(type_die)
if type_die.tag == "DW_TAG_base_type":
type_name = type_die.attributes["DW_AT_name"].value.decode("utf-8")
byte_size_attr = type_die.attributes.get("DW_AT_byte_size")
byte_size = byte_size_attr.value if byte_size_attr else None
return {
"name": type_name,
"byte_size": byte_size,
}
elif type_die.tag != "DW_TAG_base_type":
base_type_attr = type_die.attributes.get("DW_AT_type")
if base_type_attr:
base_type_offset = base_type_attr.value
return self._get_member_type(base_type_offset)
[docs]
def _get_dwarf_die_by_offset(self, offset):
"""Retrieve a DWARF DIE given its offset."""
for compilation_unit in self.dwarf_info.iter_CUs():
root_die = compilation_unit.iter_DIEs()
for die in root_die:
if die.offset == offset:
return die
return None
if __name__ == "__main__":
# logging.basicConfig(level=logging.DEBUG)
# elf_file = r"C:\Users\m67250\OneDrive - Microchip Technology Inc\Desktop\elfparser_Decoding\LAB4_FOC\LAB4_FOC.X\dist\default\debug\LAB4_FOC.X.debug.elf"
# elf_file = r"C:\Users\m67250\Downloads\pmsm (1)\mclv-48v-300w-an1292-dspic33ak512mc510_v1.0.0\pmsm.X\dist\default\production\pmsm.X.production.elf"
[docs]
elf_file = r"C:\Users\m67250\Downloads\pmsm_foc_zsmt_hybrid_sam_e54\pmsm_foc_zsmt_hybrid_sam_e54\firmware\qspin_zsmt_hybrid.X\dist\default\production\qspin_zsmt_hybrid.X.production.elf"
# elf_file = r"C:\Users\m67250\Microchip Technology Inc\Mark Wendler - M18034 - Masters_2024_MC3\MastersDemo_ZSMT_dsPIC33CK_MCLV_48_300.X\dist\default\production\MastersDemo_ZSMT_dsPIC33CK_MCLV_48_300.X.production.elf"
elf_file = r"C:\_DESKTOP\_Projects\Motorbench_Projects\motorbench_FOC_PLL_PIC33CK256mp508_MCLV2\ZSMT_dsPIC33CK_MCLV_48_300.X\dist\default\production\ZSMT_dsPIC33CK_MCLV_48_300.X.production.elf" # 16bit-ELF
elf_reader = Elf32Parser(elf_file)
variable_map = elf_reader._map_variables()
print(variable_map)
print("'''''''''''''''''''''''''''''''''''''''' ")
counter = 0
for var_name, var_info in variable_map.items():
# if var_info.address ==None and var_info.array_size !=0:
# if var_info.array_size!=0:
# print(var_name)
# print(f"Variable Name: {var_name}, Info: {var_info}")
# if var_info.address ==None:
# print(f"Variable Name: {var_name}, Info: {var_info}")
# counter+=1
if var_info.array_size != 0:
print(f"Variable Name: {var_name}, Info: {var_info}")