CCPP/analyzeTool.py
2025-04-20 20:55:06 +08:00

294 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import re
from datetime import datetime
import pandas as pd
def analyze_info_tool(info_index=1, pattern=r'result_\d+\.\d+_\d+_*', parent_folder='./'):
for folder in os.listdir(parent_folder):
# 使用正则表达式匹配文件夹名
if re.match(pattern, folder):
folder_path = os.path.join(parent_folder, folder)
# 对结果遍历 result_0.04_9_r/下文件
analyze_one_folder(info_index, folder_path)
def analyze_multiple_folders(folder_paths, info_index=1, restore=True):
"""
分析多个指定文件夹中的数据文件,提取并计算指标信息。
参数:
folder_paths (list): 要分析的文件夹路径列表
info_index (int): 要分析的第几个结果,默认为 1指的是information文件下的第i条记录
restore (bool): 是否将结果保存为 CSV 文件,默认为 True。如果为 False则打印结果
返回:
pandas.DataFrame: 包含所有分析结果的DataFrame
"""
# 结果存储
all_results = []
for folder_path in folder_paths:
# 对每个文件夹进行分析
results = analyze_one_folder(info_index, folder_path, restore=False)
all_results.extend(results)
# 创建DataFrame
df = pd.DataFrame(all_results, columns=['Dataset', 'Accuracy', 'ANI', 'MSE', 'Queries'])
if restore:
# 获取当前的日期和时间
now = datetime.now()
# 按照指定的格式生成文件名
results_filename = f"multiple_folders_analysis_{now.year}-{now.month:02d}-{now.day:02d}-{now.hour:02d}-{now.minute:02d}-{now.second:02d}.csv"
df.to_csv(results_filename, index=False)
print(f"Results saved to {results_filename}")
else:
for result in all_results:
dataset_name, acc, ani, mse, queries = result
print(f'{dataset_name}\n'
f'\t Acc:{acc}\n'
f'\t ANI:{ani}\n'
f'\t MSE:{mse}\n'
f'\t Que:{queries}\n')
return df
def analyze_one_folder(info_index=1, folder_path='./', restore=False):
"""
分析指定文件夹中的数据文件,提取并计算指标信息。
参数:
info_index (int): 要分析的第几个结果,默认为 1指的是information文件下的第i条记录。
folder_path (str): 要分析的文件夹路径,默认为当前文件夹 ('./')。
restore (bool): 是否将结果保存为 CSV 文件,默认为 False。
返回:
list: 包含分析结果的列表,每个元素是一个元组 (dataset_name, acc, ani, mse, queries)
"""
# 结果存储
results = []
for root, dirs, files in os.walk(folder_path):
# 跳过所有figures文件夹
if re.match(r'\*/figures\d+', root):
continue
# 存储匹配到的信息
successful_samples_list = [] # 成功样本数
ani_list = [] # ANI值
mse_list = [] # MSE值
queries_list = [] # queries值
correctly_classified_samples_list = [] # 总样本数
for file in files:
# 筛选子文件夹内所有information*文件
if re.match(r'information\d', file):
# 打开文件并读取内容
with open(os.path.join(root, file), 'r', errors='replace') as f:
s = f.read()
# 匹配Successful samplesANIMSEqueries的模式
pattern_successful = r'Successful samples:\s*(\d+)'
pattern_ani = r'ANI:(\d+\.\d+)'
pattern_mse = r'MSE:(\d+\.\d+)'
pattern_queries = r'Mean queries(\d+\.\d+)'
pattern_correctly_classified_samples = r'Correctly-classified samples:\s*(\d+)'
# 使用正则表达式匹配模式
successful_match = re.finditer(pattern_successful, s)
ani_match = re.finditer(pattern_ani, s)
mse_match = re.finditer(pattern_mse, s)
queries_match = re.finditer(pattern_queries, s)
correctly_classified_samples_match = re.finditer(pattern_correctly_classified_samples, s)
try:
successful_samples = None
ani = None
mse = None
queries = None
correctly_classified_samples = None
for _ in range(info_index): # 想要分析的第几个结果
successful_samples = next(successful_match).group(1)
ani = next(ani_match).group(1)
mse = next(mse_match).group(1)
queries = next(queries_match).group(1)
correctly_classified_samples = next(correctly_classified_samples_match).group(1)
successful_samples_list.append(successful_samples)
ani_list.append(ani)
mse_list.append(mse)
queries_list.append(queries)
correctly_classified_samples_list.append(correctly_classified_samples)
except StopIteration:
pass
ani_value = 0 # 总ani权值
mse_value = 0 # 总mse权值
queries_value = 0 # 总queries权值
successful_samples_counts = 0 # 成功样本数
correct_samples_counts = 0 # 总样本数
for i in range(len(successful_samples_list)):
# 跳过全部失败结果
if successful_samples_list[i] is None or ani_list[i] is None or mse_list[i] is None:
continue
mse_value += float(successful_samples_list[i]) * float(mse_list[i])
ani_value += float(successful_samples_list[i]) * float(ani_list[i])
queries_value += float(successful_samples_list[i]) * float(queries_list[i])
successful_samples_counts += float(successful_samples_list[i])
correct_samples_counts += float(correctly_classified_samples_list[i])
# 保存结果
if successful_samples_counts > 0:
# 数据集名称是文件夹的最后两个部分
# dataset_name = os.path.basename(os.path.dirname(root)) + '_' + os.path.basename(root)
# 数据集名称是文件夹的倒数第两个部分
# dataset_name = os.path.basename(os.path.dirname(root))
# 数据集名称是文件夹的最后一个部分
dataset_name = os.path.basename(root)
acc = successful_samples_counts / correct_samples_counts
ani = ani_value / successful_samples_counts
mse = mse_value / successful_samples_counts
queries = queries_value / successful_samples_counts
results.append((dataset_name, acc, ani, mse, queries))
if restore:
df = pd.DataFrame(results, columns=['Dataset', 'Accuracy', 'ANI', 'MSE', 'Queries'])
# 获取当前的日期和时间
now = datetime.now()
# 按照指定的格式生成文件名
results_filename = f"analyze_result_{now.year}-{now.month:02d}-{now.day:02d}-{now.hour:02d}-{now.minute:02d}-{now.second:02d}.csv"
df.to_csv(results_filename, index=False)
else:
for dataset_name, acc, ani, mse, queries in results:
print(f'{dataset_name}\n'
f'\t Acc:{acc}\n'
f'\t ANI:{ani}\n'
f'\t MSE:{mse}\n'
f'\t Que:{queries}\n')
return results
def count_classes_samples(folder_path, select_colum='Success'):
"""
根据给定文件夹路径分析包含的information*文件统计文件中第4列和第5列的元组出现个数
仅在第9列的值为'Success'时统计。
:param select_colum: 选择统计的第九列类别
:param folder_path: 被分析的文件夹根目录
:return: 元组出现次数的二维数组
"""
key_pair_counts = {}
for root, dirs, files in os.walk(folder_path):
for file in files:
# 筛选子文件夹内所有information*文件
if re.match(r'information\d', file):
# 打开文件并读取内容
with open(os.path.join(root, file), 'r') as f:
data = f.read()
# 将数据拆分成行并解析成列表
lines = data.strip().split('\n')
records = [line.split() for line in lines if len(line.split()) > 8] # 确保至少有9列
# 统计第4列和第5列的键对值出现次数仅在第9列为'select_colum'时统计默认Success
for record in records:
if record[8] == select_colum:
key_pair = (int(record[3]), int(record[4]))
if key_pair in key_pair_counts:
key_pair_counts[key_pair] += 1
else:
key_pair_counts[key_pair] = 1
# 将结果转换为二维数组
result_array = [[key_pair[0], key_pair[1], count] for key_pair, count in key_pair_counts.items()]
return result_array
def generate_heat_maps_array(data, default_value=0, classes_size=None):
"""
根据给定的数据生成一个二维数组。规则是对于每个元组,例如[6, 1, 1]
将生成的二维数组array[6][1]设置为1。
:param classes_size: 元组维度
:param data: 包含元组的列表,每个元组包含三个整数(行索引,列索引,值)。
:param default_value: 生成的二维数组的默认值默认为0。
:return: 生成的二维数组。
"""
# 找到二维数组的大小
if classes_size is not None:
max_row = max_col = classes_size
else:
max_row = max(item[0] for item in data) + 1
max_col = max(item[1] for item in data) + 1
# 初始化二维数组
array = [[default_value for _ in range(max_col)] for _ in range(max_row)]
# 设置数组中的指定值
for row, col, value in data:
array[col][row] = value
return array
if __name__ == "__main__":
# 定义要分析的实验结果文件夹路径列表
experiment_folders = [
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Car",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Wafer",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ItalyPowerDemand",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/AllGestureWiimoteY",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/NonIFECGTho2",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Trace",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/PigAirwayPressure",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/GesturePebbleZ2",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/AllGestureWiimoteX",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/FordA",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/DodgerLoopDay",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/SonyAIBORobotSurface1",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/PigArtPressure",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/MoteStrain",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ECGFiveDays",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/PickupGeWZ",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/FreezerSmallTrain",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/TwoLeadECG",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Lightning7",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Phoneme",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/DodgerLoopWeekend",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ECG5000",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ShakeGestureWiimoteZ",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/GesturePebbleZ1",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/NonIFECGTho1",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/AllGestureWiimoteZ",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/FordB",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/InsectWingbeatSound",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/EOGHorizontalSignal",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ChlorineConcentration",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Plane",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/EOGVerticalSignal",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/DodgerLoopGame",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ECG200",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Fungi",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/CinCEC",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/SonyAIBORobotSurface2",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Lightning2",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/FreezerRegularTrain",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/StarLightCurves",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Earthquakes",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/PigCVP",
]
# 分析所有文件夹并将结果保存到单个CSV文件中
results_df = analyze_multiple_folders(experiment_folders)
# 如果想查看结果但不保存CSV可以使用
# results_df = analyze_multiple_folders(experiment_folders, restore=False)