import os import re from datetime import datetime import pandas as pd def analyze_info_tool(info_index=1, pattern=r'result_\d+\.\d+_\d+_*', parent_folder='./'): for folder in os.listdir(parent_folder): # 使用正则表达式匹配文件夹名 if re.match(pattern, folder): folder_path = os.path.join(parent_folder, folder) # 对结果遍历 result_0.04_9_r/下文件 analyze_one_folder(info_index, folder_path) def analyze_multiple_folders(folder_paths, info_index=1, restore=True): """ 分析多个指定文件夹中的数据文件,提取并计算指标信息。 参数: folder_paths (list): 要分析的文件夹路径列表 info_index (int): 要分析的第几个结果,默认为 1,指的是information文件下的第i条记录 restore (bool): 是否将结果保存为 CSV 文件,默认为 True。如果为 False,则打印结果 返回: pandas.DataFrame: 包含所有分析结果的DataFrame """ # 结果存储 all_results = [] for folder_path in folder_paths: # 对每个文件夹进行分析 results = analyze_one_folder(info_index, folder_path, restore=False) all_results.extend(results) # 创建DataFrame df = pd.DataFrame(all_results, columns=['Dataset', 'Accuracy', 'ANI', 'MSE', 'Queries']) if restore: # 获取当前的日期和时间 now = datetime.now() # 按照指定的格式生成文件名 results_filename = f"multiple_folders_analysis_{now.year}-{now.month:02d}-{now.day:02d}-{now.hour:02d}-{now.minute:02d}-{now.second:02d}.csv" df.to_csv(results_filename, index=False) print(f"Results saved to {results_filename}") else: for result in all_results: dataset_name, acc, ani, mse, queries = result print(f'{dataset_name}\n' f'\t Acc:{acc}\n' f'\t ANI:{ani}\n' f'\t MSE:{mse}\n' f'\t Que:{queries}\n') return df def analyze_one_folder(info_index=1, folder_path='./', restore=False): """ 分析指定文件夹中的数据文件,提取并计算指标信息。 参数: info_index (int): 要分析的第几个结果,默认为 1,指的是information文件下的第i条记录。 folder_path (str): 要分析的文件夹路径,默认为当前文件夹 ('./')。 restore (bool): 是否将结果保存为 CSV 文件,默认为 False。 返回: list: 包含分析结果的列表,每个元素是一个元组 (dataset_name, acc, ani, mse, queries) """ # 结果存储 results = [] for root, dirs, files in os.walk(folder_path): # 跳过所有figures文件夹 if re.match(r'\*/figures\d+', root): continue # 存储匹配到的信息 successful_samples_list = [] # 成功样本数 ani_list = [] # ANI值 mse_list = [] # MSE值 queries_list = [] # queries值 correctly_classified_samples_list = [] # 总样本数 for file in files: # 筛选子文件夹内所有information*文件 if re.match(r'information\d', file): # 打开文件并读取内容 with open(os.path.join(root, file), 'r', errors='replace') as f: s = f.read() # 匹配Successful samples,ANI,MSE,queries的模式 pattern_successful = r'Successful samples:\s*(\d+)' pattern_ani = r'ANI:(\d+\.\d+)' pattern_mse = r'MSE:(\d+\.\d+)' pattern_queries = r'Mean queries:(\d+\.\d+)' pattern_correctly_classified_samples = r'Correctly-classified samples:\s*(\d+)' # 使用正则表达式匹配模式 successful_match = re.finditer(pattern_successful, s) ani_match = re.finditer(pattern_ani, s) mse_match = re.finditer(pattern_mse, s) queries_match = re.finditer(pattern_queries, s) correctly_classified_samples_match = re.finditer(pattern_correctly_classified_samples, s) try: successful_samples = None ani = None mse = None queries = None correctly_classified_samples = None for _ in range(info_index): # 想要分析的第几个结果 successful_samples = next(successful_match).group(1) ani = next(ani_match).group(1) mse = next(mse_match).group(1) queries = next(queries_match).group(1) correctly_classified_samples = next(correctly_classified_samples_match).group(1) successful_samples_list.append(successful_samples) ani_list.append(ani) mse_list.append(mse) queries_list.append(queries) correctly_classified_samples_list.append(correctly_classified_samples) except StopIteration: pass ani_value = 0 # 总ani权值 mse_value = 0 # 总mse权值 queries_value = 0 # 总queries权值 successful_samples_counts = 0 # 成功样本数 correct_samples_counts = 0 # 总样本数 for i in range(len(successful_samples_list)): # 跳过全部失败结果 if successful_samples_list[i] is None or ani_list[i] is None or mse_list[i] is None: continue mse_value += float(successful_samples_list[i]) * float(mse_list[i]) ani_value += float(successful_samples_list[i]) * float(ani_list[i]) queries_value += float(successful_samples_list[i]) * float(queries_list[i]) successful_samples_counts += float(successful_samples_list[i]) correct_samples_counts += float(correctly_classified_samples_list[i]) # 保存结果 if successful_samples_counts > 0: # 数据集名称是文件夹的最后两个部分 # dataset_name = os.path.basename(os.path.dirname(root)) + '_' + os.path.basename(root) # 数据集名称是文件夹的倒数第两个部分 # dataset_name = os.path.basename(os.path.dirname(root)) # 数据集名称是文件夹的最后一个部分 dataset_name = os.path.basename(root) acc = successful_samples_counts / correct_samples_counts ani = ani_value / successful_samples_counts mse = mse_value / successful_samples_counts queries = queries_value / successful_samples_counts results.append((dataset_name, acc, ani, mse, queries)) if restore: df = pd.DataFrame(results, columns=['Dataset', 'Accuracy', 'ANI', 'MSE', 'Queries']) # 获取当前的日期和时间 now = datetime.now() # 按照指定的格式生成文件名 results_filename = f"analyze_result_{now.year}-{now.month:02d}-{now.day:02d}-{now.hour:02d}-{now.minute:02d}-{now.second:02d}.csv" df.to_csv(results_filename, index=False) else: for dataset_name, acc, ani, mse, queries in results: print(f'{dataset_name}\n' f'\t Acc:{acc}\n' f'\t ANI:{ani}\n' f'\t MSE:{mse}\n' f'\t Que:{queries}\n') return results def count_classes_samples(folder_path, select_colum='Success'): """ 根据给定文件夹路径,分析包含的information*文件,统计文件中第4列和第5列的元组出现个数, 仅在第9列的值为'Success'时统计。 :param select_colum: 选择统计的第九列类别 :param folder_path: 被分析的文件夹根目录 :return: 元组出现次数的二维数组 """ key_pair_counts = {} for root, dirs, files in os.walk(folder_path): for file in files: # 筛选子文件夹内所有information*文件 if re.match(r'information\d', file): # 打开文件并读取内容 with open(os.path.join(root, file), 'r') as f: data = f.read() # 将数据拆分成行并解析成列表 lines = data.strip().split('\n') records = [line.split() for line in lines if len(line.split()) > 8] # 确保至少有9列 # 统计第4列和第5列的键对值出现次数,仅在第9列为'select_colum'时统计,默认Success for record in records: if record[8] == select_colum: key_pair = (int(record[3]), int(record[4])) if key_pair in key_pair_counts: key_pair_counts[key_pair] += 1 else: key_pair_counts[key_pair] = 1 # 将结果转换为二维数组 result_array = [[key_pair[0], key_pair[1], count] for key_pair, count in key_pair_counts.items()] return result_array def generate_heat_maps_array(data, default_value=0, classes_size=None): """ 根据给定的数据生成一个二维数组。规则是对于每个元组,例如[6, 1, 1], 将生成的二维数组array[6][1]设置为1。 :param classes_size: 元组维度 :param data: 包含元组的列表,每个元组包含三个整数(行索引,列索引,值)。 :param default_value: 生成的二维数组的默认值,默认为0。 :return: 生成的二维数组。 """ # 找到二维数组的大小 if classes_size is not None: max_row = max_col = classes_size else: max_row = max(item[0] for item in data) + 1 max_col = max(item[1] for item in data) + 1 # 初始化二维数组 array = [[default_value for _ in range(max_col)] for _ in range(max_row)] # 设置数组中的指定值 for row, col, value in data: array[col][row] = value return array if __name__ == "__main__": # 定义要分析的实验结果文件夹路径列表 experiment_folders = [ "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Car", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Wafer", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ItalyPowerDemand", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/AllGestureWiimoteY", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/NonIFECGTho2", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Trace", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/PigAirwayPressure", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/GesturePebbleZ2", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/AllGestureWiimoteX", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/FordA", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/DodgerLoopDay", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/SonyAIBORobotSurface1", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/PigArtPressure", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/MoteStrain", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ECGFiveDays", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/PickupGeWZ", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/FreezerSmallTrain", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/TwoLeadECG", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Lightning7", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Phoneme", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/DodgerLoopWeekend", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ECG5000", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ShakeGestureWiimoteZ", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/GesturePebbleZ1", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/NonIFECGTho1", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/AllGestureWiimoteZ", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/FordB", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/InsectWingbeatSound", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/EOGHorizontalSignal", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ChlorineConcentration", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Plane", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/EOGVerticalSignal", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/DodgerLoopGame", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ECG200", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Fungi", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/CinCEC", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/SonyAIBORobotSurface2", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Lightning2", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/FreezerRegularTrain", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/StarLightCurves", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Earthquakes", "/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/PigCVP", ] # 分析所有文件夹并将结果保存到单个CSV文件中 results_df = analyze_multiple_folders(experiment_folders) # 如果想查看结果但不保存CSV,可以使用: # results_df = analyze_multiple_folders(experiment_folders, restore=False)