CCPP/analyzeTool.py

294 lines
16 KiB
Python
Raw Normal View History

2025-04-20 20:55:06 +08:00
import os
import re
from datetime import datetime
import pandas as pd
def analyze_info_tool(info_index=1, pattern=r'result_\d+\.\d+_\d+_*', parent_folder='./'):
for folder in os.listdir(parent_folder):
# 使用正则表达式匹配文件夹名
if re.match(pattern, folder):
folder_path = os.path.join(parent_folder, folder)
# 对结果遍历 result_0.04_9_r/下文件
analyze_one_folder(info_index, folder_path)
def analyze_multiple_folders(folder_paths, info_index=1, restore=True):
"""
分析多个指定文件夹中的数据文件提取并计算指标信息
参数
folder_paths (list): 要分析的文件夹路径列表
info_index (int): 要分析的第几个结果默认为 1指的是information文件下的第i条记录
restore (bool): 是否将结果保存为 CSV 文件默认为 True如果为 False则打印结果
返回
pandas.DataFrame: 包含所有分析结果的DataFrame
"""
# 结果存储
all_results = []
for folder_path in folder_paths:
# 对每个文件夹进行分析
results = analyze_one_folder(info_index, folder_path, restore=False)
all_results.extend(results)
# 创建DataFrame
df = pd.DataFrame(all_results, columns=['Dataset', 'Accuracy', 'ANI', 'MSE', 'Queries'])
if restore:
# 获取当前的日期和时间
now = datetime.now()
# 按照指定的格式生成文件名
results_filename = f"multiple_folders_analysis_{now.year}-{now.month:02d}-{now.day:02d}-{now.hour:02d}-{now.minute:02d}-{now.second:02d}.csv"
df.to_csv(results_filename, index=False)
print(f"Results saved to {results_filename}")
else:
for result in all_results:
dataset_name, acc, ani, mse, queries = result
print(f'{dataset_name}\n'
f'\t Acc:{acc}\n'
f'\t ANI:{ani}\n'
f'\t MSE:{mse}\n'
f'\t Que:{queries}\n')
return df
def analyze_one_folder(info_index=1, folder_path='./', restore=False):
"""
分析指定文件夹中的数据文件提取并计算指标信息
参数
info_index (int): 要分析的第几个结果默认为 1指的是information文件下的第i条记录
folder_path (str): 要分析的文件夹路径默认为当前文件夹 ('./')
restore (bool): 是否将结果保存为 CSV 文件默认为 False
返回
list: 包含分析结果的列表每个元素是一个元组 (dataset_name, acc, ani, mse, queries)
"""
# 结果存储
results = []
for root, dirs, files in os.walk(folder_path):
# 跳过所有figures文件夹
if re.match(r'\*/figures\d+', root):
continue
# 存储匹配到的信息
successful_samples_list = [] # 成功样本数
ani_list = [] # ANI值
mse_list = [] # MSE值
queries_list = [] # queries值
correctly_classified_samples_list = [] # 总样本数
for file in files:
# 筛选子文件夹内所有information*文件
if re.match(r'information\d', file):
# 打开文件并读取内容
with open(os.path.join(root, file), 'r', errors='replace') as f:
s = f.read()
# 匹配Successful samplesANIMSEqueries的模式
pattern_successful = r'Successful samples:\s*(\d+)'
pattern_ani = r'ANI:(\d+\.\d+)'
pattern_mse = r'MSE:(\d+\.\d+)'
pattern_queries = r'Mean queries(\d+\.\d+)'
pattern_correctly_classified_samples = r'Correctly-classified samples:\s*(\d+)'
# 使用正则表达式匹配模式
successful_match = re.finditer(pattern_successful, s)
ani_match = re.finditer(pattern_ani, s)
mse_match = re.finditer(pattern_mse, s)
queries_match = re.finditer(pattern_queries, s)
correctly_classified_samples_match = re.finditer(pattern_correctly_classified_samples, s)
try:
successful_samples = None
ani = None
mse = None
queries = None
correctly_classified_samples = None
for _ in range(info_index): # 想要分析的第几个结果
successful_samples = next(successful_match).group(1)
ani = next(ani_match).group(1)
mse = next(mse_match).group(1)
queries = next(queries_match).group(1)
correctly_classified_samples = next(correctly_classified_samples_match).group(1)
successful_samples_list.append(successful_samples)
ani_list.append(ani)
mse_list.append(mse)
queries_list.append(queries)
correctly_classified_samples_list.append(correctly_classified_samples)
except StopIteration:
pass
ani_value = 0 # 总ani权值
mse_value = 0 # 总mse权值
queries_value = 0 # 总queries权值
successful_samples_counts = 0 # 成功样本数
correct_samples_counts = 0 # 总样本数
for i in range(len(successful_samples_list)):
# 跳过全部失败结果
if successful_samples_list[i] is None or ani_list[i] is None or mse_list[i] is None:
continue
mse_value += float(successful_samples_list[i]) * float(mse_list[i])
ani_value += float(successful_samples_list[i]) * float(ani_list[i])
queries_value += float(successful_samples_list[i]) * float(queries_list[i])
successful_samples_counts += float(successful_samples_list[i])
correct_samples_counts += float(correctly_classified_samples_list[i])
# 保存结果
if successful_samples_counts > 0:
# 数据集名称是文件夹的最后两个部分
# dataset_name = os.path.basename(os.path.dirname(root)) + '_' + os.path.basename(root)
# 数据集名称是文件夹的倒数第两个部分
# dataset_name = os.path.basename(os.path.dirname(root))
# 数据集名称是文件夹的最后一个部分
dataset_name = os.path.basename(root)
acc = successful_samples_counts / correct_samples_counts
ani = ani_value / successful_samples_counts
mse = mse_value / successful_samples_counts
queries = queries_value / successful_samples_counts
results.append((dataset_name, acc, ani, mse, queries))
if restore:
df = pd.DataFrame(results, columns=['Dataset', 'Accuracy', 'ANI', 'MSE', 'Queries'])
# 获取当前的日期和时间
now = datetime.now()
# 按照指定的格式生成文件名
results_filename = f"analyze_result_{now.year}-{now.month:02d}-{now.day:02d}-{now.hour:02d}-{now.minute:02d}-{now.second:02d}.csv"
df.to_csv(results_filename, index=False)
else:
for dataset_name, acc, ani, mse, queries in results:
print(f'{dataset_name}\n'
f'\t Acc:{acc}\n'
f'\t ANI:{ani}\n'
f'\t MSE:{mse}\n'
f'\t Que:{queries}\n')
return results
def count_classes_samples(folder_path, select_colum='Success'):
"""
根据给定文件夹路径分析包含的information*文件统计文件中第4列和第5列的元组出现个数
仅在第9列的值为'Success'时统计
:param select_colum: 选择统计的第九列类别
:param folder_path: 被分析的文件夹根目录
:return: 元组出现次数的二维数组
"""
key_pair_counts = {}
for root, dirs, files in os.walk(folder_path):
for file in files:
# 筛选子文件夹内所有information*文件
if re.match(r'information\d', file):
# 打开文件并读取内容
with open(os.path.join(root, file), 'r') as f:
data = f.read()
# 将数据拆分成行并解析成列表
lines = data.strip().split('\n')
records = [line.split() for line in lines if len(line.split()) > 8] # 确保至少有9列
# 统计第4列和第5列的键对值出现次数仅在第9列为'select_colum'时统计默认Success
for record in records:
if record[8] == select_colum:
key_pair = (int(record[3]), int(record[4]))
if key_pair in key_pair_counts:
key_pair_counts[key_pair] += 1
else:
key_pair_counts[key_pair] = 1
# 将结果转换为二维数组
result_array = [[key_pair[0], key_pair[1], count] for key_pair, count in key_pair_counts.items()]
return result_array
def generate_heat_maps_array(data, default_value=0, classes_size=None):
"""
根据给定的数据生成一个二维数组规则是对于每个元组例如[6, 1, 1]
将生成的二维数组array[6][1]设置为1
:param classes_size: 元组维度
:param data: 包含元组的列表每个元组包含三个整数行索引列索引
:param default_value: 生成的二维数组的默认值默认为0
:return: 生成的二维数组
"""
# 找到二维数组的大小
if classes_size is not None:
max_row = max_col = classes_size
else:
max_row = max(item[0] for item in data) + 1
max_col = max(item[1] for item in data) + 1
# 初始化二维数组
array = [[default_value for _ in range(max_col)] for _ in range(max_row)]
# 设置数组中的指定值
for row, col, value in data:
array[col][row] = value
return array
if __name__ == "__main__":
# 定义要分析的实验结果文件夹路径列表
experiment_folders = [
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Car",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Wafer",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ItalyPowerDemand",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/AllGestureWiimoteY",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/NonIFECGTho2",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Trace",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/PigAirwayPressure",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/GesturePebbleZ2",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/AllGestureWiimoteX",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/FordA",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/DodgerLoopDay",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/SonyAIBORobotSurface1",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/PigArtPressure",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/MoteStrain",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ECGFiveDays",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/PickupGeWZ",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/FreezerSmallTrain",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/TwoLeadECG",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Lightning7",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Phoneme",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/DodgerLoopWeekend",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ECG5000",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ShakeGestureWiimoteZ",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/GesturePebbleZ1",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/NonIFECGTho1",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/AllGestureWiimoteZ",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/FordB",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/InsectWingbeatSound",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/EOGHorizontalSignal",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ChlorineConcentration",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Plane",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/EOGVerticalSignal",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/DodgerLoopGame",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ECG200",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Fungi",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/CinCEC",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/SonyAIBORobotSurface2",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Lightning2",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/FreezerRegularTrain",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/StarLightCurves",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Earthquakes",
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/PigCVP",
]
# 分析所有文件夹并将结果保存到单个CSV文件中
results_df = analyze_multiple_folders(experiment_folders)
# 如果想查看结果但不保存CSV可以使用
# results_df = analyze_multiple_folders(experiment_folders, restore=False)