ABOUT ME

-

Today
-
Yesterday
-
Total
-
  • [C 함수 파싱] clang library
    공부/Python 2025. 2. 10. 00:38

    코드를 바이너리 코드로 변환할 때 사용하는 llvm기반으로 C함수 파싱

     

    LLVM-<version>-win64.exe 다운로드

    https://github.com/llvm/llvm-project/releases

     

    Releases · llvm/llvm-project

    The LLVM Project is a collection of modular and reusable compiler and toolchain technologies. - llvm/llvm-project

    github.com

     

    lib clang docs

    https://libclang.readthedocs.io/en/latest/_modules/clang/cindex.html

     

    example code

    dir structure

    import clang.cindex
    import re
    import os
    
    # Windows에서 libclang.dll 경로 설정 (설치 경로에 맞게 수정)
    clang.cindex.Config.set_library_file("C:/Program Files/LLVM/bin/libclang.dll")
    clang_index = clang.cindex.Index.create()
    print("libclang.dll load success.")
    
    project_functions = dict()
    
    
    def preprocess_file(input_file, output_file):
        try:
            with open(input_file, "r", encoding="utf-8") as f:
                c_code = f.read()
        except FileExistsError as e:
            print(f"{e}")
    
        # 주석 제거
        c_code = re.sub(r'//.*', '', c_code)
        c_code = re.sub(r'/\*.*?\*/', '', c_code, flags=re.DOTALL)
    
        # 줄 단위로 분할 후 #include가 포함된 줄 제거
        preprocessed_code = "\n".join(line for line in c_code.split("\n") if not line.strip().startswith("#include"))
    
        # 새로운 파일에 저장
        try:
            with open(output_file, "w", encoding="utf-8") as f:
                f.write(preprocessed_code)
        except Exception as e:
            print(f"{e}")
    
    
    def parse_functions_info(file_path):
        tu = clang_index.parse(file_path)
    
        functions = []
        for node in tu.cursor.get_children():
            if node.kind == clang.cindex.CursorKind.FUNCTION_DECL:
                func_name = node.spelling   #함수이름
                func_start = node.extent.start.line
                func_end = node.extent.end.line
                functions.append((func_name, func_start, func_end))
    
        return functions
    
    
    def parse_functions_code(file_path, functions):
        print(f"extract_code: {file_path}")
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                c_code = f.readlines()
        except Exception as e:
            print(f"{e}")
        
        for func_name, start, end in functions:
            func_code = "".join(c_code[start-1:end])
            func_key = os.path.basename(file_path) + '_' + func_name
            project_functions[func_key] = func_code
            # print(f"Function: {func_name}\n{func_code}\n{'-'*40}")  # </hr>
    
    
    if __name__ == "__main__":
        # example for project A
        file_path_list = [
            "sources/test.c",
            "sources/test2.c",
        ]
    
        project_name = 'A'
        project_dir_path = f"result/{project_name}"
        os.makedirs(project_dir_path, exist_ok=True)
        for file_path in file_path_list:
            # 주석 제거, 헤더 제거, save to <project name>/<file.c>.
            preprocess_file(file_path, f"{project_dir_path}/{os.path.basename(file_path)}")
    
        for root, _, file_paths in os.walk(project_dir_path):
            for file_path in file_paths:
                curr_file_path = f"{root}/{file_path}"
                # print(f"{curr_file_path}: ")
                functions = parse_functions_info(curr_file_path)
                parse_functions_code(curr_file_path, functions)
    
        print(project_functions)

     

    '공부 > Python' 카테고리의 다른 글

    [PJT] Robot  (0) 2024.05.30
    [PJT] ImageLabelingTool  (0) 2024.05.30
    [python-pptx] 파이썬 업무 자동화  (1) 2023.09.19
    [openpyxl] 액셀 파일 조작  (0) 2022.08.07
    openpyxl, 파이썬에서 .xlsx 다루기  (0) 2022.07.27
Designed by Tistory.