공부/Python
[C 함수 파싱] clang library
래울
2025. 2. 10. 00:38
코드를 바이너리 코드로 변환할 때 사용하는 llvm기반으로 C함수 파싱
LLVM-<version>-win64.exe 다운로드
https://github.com/llvm/llvm-project/releases
Releases · llvm/llvm-project
The LLVM Project is a collection of modular and reusable compiler and toolchain technologies. - llvm/llvm-project
github.com
lib clang docs
https://libclang.readthedocs.io/en/latest/_modules/clang/cindex.html
example code
import clang.cindex
import re
import os
# Windows에서 libclang.dll 경로 설정 (설치 경로에 맞게 수정)
clang.cindex.Config.set_library_file("C:/Program Files/LLVM/bin/libclang.dll")
clang_index = clang.cindex.Index.create()
print("libclang.dll load success.")
project_functions = dict()
def preprocess_file(input_file, output_file):
try:
with open(input_file, "r", encoding="utf-8") as f:
c_code = f.read()
except FileExistsError as e:
print(f"{e}")
# 주석 제거
c_code = re.sub(r'//.*', '', c_code)
c_code = re.sub(r'/\*.*?\*/', '', c_code, flags=re.DOTALL)
# 줄 단위로 분할 후 #include가 포함된 줄 제거
preprocessed_code = "\n".join(line for line in c_code.split("\n") if not line.strip().startswith("#include"))
# 새로운 파일에 저장
try:
with open(output_file, "w", encoding="utf-8") as f:
f.write(preprocessed_code)
except Exception as e:
print(f"{e}")
def parse_functions_info(file_path):
tu = clang_index.parse(file_path)
functions = []
for node in tu.cursor.get_children():
if node.kind == clang.cindex.CursorKind.FUNCTION_DECL:
func_name = node.spelling #함수이름
func_start = node.extent.start.line
func_end = node.extent.end.line
functions.append((func_name, func_start, func_end))
return functions
def parse_functions_code(file_path, functions):
print(f"extract_code: {file_path}")
try:
with open(file_path, "r", encoding="utf-8") as f:
c_code = f.readlines()
except Exception as e:
print(f"{e}")
for func_name, start, end in functions:
func_code = "".join(c_code[start-1:end])
func_key = os.path.basename(file_path) + '_' + func_name
project_functions[func_key] = func_code
# print(f"Function: {func_name}\n{func_code}\n{'-'*40}") # </hr>
if __name__ == "__main__":
# example for project A
file_path_list = [
"sources/test.c",
"sources/test2.c",
]
project_name = 'A'
project_dir_path = f"result/{project_name}"
os.makedirs(project_dir_path, exist_ok=True)
for file_path in file_path_list:
# 주석 제거, 헤더 제거, save to <project name>/<file.c>.
preprocess_file(file_path, f"{project_dir_path}/{os.path.basename(file_path)}")
for root, _, file_paths in os.walk(project_dir_path):
for file_path in file_paths:
curr_file_path = f"{root}/{file_path}"
# print(f"{curr_file_path}: ")
functions = parse_functions_info(curr_file_path)
parse_functions_code(curr_file_path, functions)
print(project_functions)