294 lines
16 KiB
Python
294 lines
16 KiB
Python
import os
|
||
import re
|
||
from datetime import datetime
|
||
|
||
import pandas as pd
|
||
|
||
|
||
def analyze_info_tool(info_index=1, pattern=r'result_\d+\.\d+_\d+_*', parent_folder='./'):
|
||
for folder in os.listdir(parent_folder):
|
||
# 使用正则表达式匹配文件夹名
|
||
if re.match(pattern, folder):
|
||
folder_path = os.path.join(parent_folder, folder)
|
||
|
||
# 对结果遍历 result_0.04_9_r/下文件
|
||
analyze_one_folder(info_index, folder_path)
|
||
|
||
|
||
def analyze_multiple_folders(folder_paths, info_index=1, restore=True):
|
||
"""
|
||
分析多个指定文件夹中的数据文件,提取并计算指标信息。
|
||
|
||
参数:
|
||
folder_paths (list): 要分析的文件夹路径列表
|
||
info_index (int): 要分析的第几个结果,默认为 1,指的是information文件下的第i条记录
|
||
restore (bool): 是否将结果保存为 CSV 文件,默认为 True。如果为 False,则打印结果
|
||
|
||
返回:
|
||
pandas.DataFrame: 包含所有分析结果的DataFrame
|
||
"""
|
||
# 结果存储
|
||
all_results = []
|
||
|
||
for folder_path in folder_paths:
|
||
# 对每个文件夹进行分析
|
||
results = analyze_one_folder(info_index, folder_path, restore=False)
|
||
all_results.extend(results)
|
||
|
||
# 创建DataFrame
|
||
df = pd.DataFrame(all_results, columns=['Dataset', 'Accuracy', 'ANI', 'MSE', 'Queries'])
|
||
|
||
if restore:
|
||
# 获取当前的日期和时间
|
||
now = datetime.now()
|
||
# 按照指定的格式生成文件名
|
||
results_filename = f"multiple_folders_analysis_{now.year}-{now.month:02d}-{now.day:02d}-{now.hour:02d}-{now.minute:02d}-{now.second:02d}.csv"
|
||
df.to_csv(results_filename, index=False)
|
||
print(f"Results saved to {results_filename}")
|
||
else:
|
||
for result in all_results:
|
||
dataset_name, acc, ani, mse, queries = result
|
||
print(f'{dataset_name}\n'
|
||
f'\t Acc:{acc}\n'
|
||
f'\t ANI:{ani}\n'
|
||
f'\t MSE:{mse}\n'
|
||
f'\t Que:{queries}\n')
|
||
|
||
return df
|
||
|
||
def analyze_one_folder(info_index=1, folder_path='./', restore=False):
|
||
"""
|
||
分析指定文件夹中的数据文件,提取并计算指标信息。
|
||
|
||
参数:
|
||
info_index (int): 要分析的第几个结果,默认为 1,指的是information文件下的第i条记录。
|
||
folder_path (str): 要分析的文件夹路径,默认为当前文件夹 ('./')。
|
||
restore (bool): 是否将结果保存为 CSV 文件,默认为 False。
|
||
|
||
返回:
|
||
list: 包含分析结果的列表,每个元素是一个元组 (dataset_name, acc, ani, mse, queries)
|
||
"""
|
||
# 结果存储
|
||
results = []
|
||
|
||
for root, dirs, files in os.walk(folder_path):
|
||
# 跳过所有figures文件夹
|
||
if re.match(r'\*/figures\d+', root):
|
||
continue
|
||
|
||
# 存储匹配到的信息
|
||
successful_samples_list = [] # 成功样本数
|
||
ani_list = [] # ANI值
|
||
mse_list = [] # MSE值
|
||
queries_list = [] # queries值
|
||
correctly_classified_samples_list = [] # 总样本数
|
||
|
||
for file in files:
|
||
# 筛选子文件夹内所有information*文件
|
||
if re.match(r'information\d', file):
|
||
# 打开文件并读取内容
|
||
with open(os.path.join(root, file), 'r', errors='replace') as f:
|
||
s = f.read()
|
||
|
||
# 匹配Successful samples,ANI,MSE,queries的模式
|
||
pattern_successful = r'Successful samples:\s*(\d+)'
|
||
pattern_ani = r'ANI:(\d+\.\d+)'
|
||
pattern_mse = r'MSE:(\d+\.\d+)'
|
||
pattern_queries = r'Mean queries:(\d+\.\d+)'
|
||
pattern_correctly_classified_samples = r'Correctly-classified samples:\s*(\d+)'
|
||
|
||
# 使用正则表达式匹配模式
|
||
successful_match = re.finditer(pattern_successful, s)
|
||
ani_match = re.finditer(pattern_ani, s)
|
||
mse_match = re.finditer(pattern_mse, s)
|
||
queries_match = re.finditer(pattern_queries, s)
|
||
correctly_classified_samples_match = re.finditer(pattern_correctly_classified_samples, s)
|
||
|
||
try:
|
||
successful_samples = None
|
||
ani = None
|
||
mse = None
|
||
queries = None
|
||
correctly_classified_samples = None
|
||
|
||
for _ in range(info_index): # 想要分析的第几个结果
|
||
successful_samples = next(successful_match).group(1)
|
||
ani = next(ani_match).group(1)
|
||
mse = next(mse_match).group(1)
|
||
queries = next(queries_match).group(1)
|
||
correctly_classified_samples = next(correctly_classified_samples_match).group(1)
|
||
|
||
successful_samples_list.append(successful_samples)
|
||
ani_list.append(ani)
|
||
mse_list.append(mse)
|
||
queries_list.append(queries)
|
||
correctly_classified_samples_list.append(correctly_classified_samples)
|
||
except StopIteration:
|
||
pass
|
||
|
||
ani_value = 0 # 总ani权值
|
||
mse_value = 0 # 总mse权值
|
||
queries_value = 0 # 总queries权值
|
||
successful_samples_counts = 0 # 成功样本数
|
||
correct_samples_counts = 0 # 总样本数
|
||
|
||
for i in range(len(successful_samples_list)):
|
||
# 跳过全部失败结果
|
||
if successful_samples_list[i] is None or ani_list[i] is None or mse_list[i] is None:
|
||
continue
|
||
|
||
mse_value += float(successful_samples_list[i]) * float(mse_list[i])
|
||
ani_value += float(successful_samples_list[i]) * float(ani_list[i])
|
||
queries_value += float(successful_samples_list[i]) * float(queries_list[i])
|
||
successful_samples_counts += float(successful_samples_list[i])
|
||
correct_samples_counts += float(correctly_classified_samples_list[i])
|
||
|
||
# 保存结果
|
||
if successful_samples_counts > 0:
|
||
# 数据集名称是文件夹的最后两个部分
|
||
# dataset_name = os.path.basename(os.path.dirname(root)) + '_' + os.path.basename(root)
|
||
# 数据集名称是文件夹的倒数第两个部分
|
||
# dataset_name = os.path.basename(os.path.dirname(root))
|
||
# 数据集名称是文件夹的最后一个部分
|
||
dataset_name = os.path.basename(root)
|
||
acc = successful_samples_counts / correct_samples_counts
|
||
ani = ani_value / successful_samples_counts
|
||
mse = mse_value / successful_samples_counts
|
||
queries = queries_value / successful_samples_counts
|
||
|
||
results.append((dataset_name, acc, ani, mse, queries))
|
||
|
||
if restore:
|
||
df = pd.DataFrame(results, columns=['Dataset', 'Accuracy', 'ANI', 'MSE', 'Queries'])
|
||
# 获取当前的日期和时间
|
||
now = datetime.now()
|
||
# 按照指定的格式生成文件名
|
||
results_filename = f"analyze_result_{now.year}-{now.month:02d}-{now.day:02d}-{now.hour:02d}-{now.minute:02d}-{now.second:02d}.csv"
|
||
df.to_csv(results_filename, index=False)
|
||
else:
|
||
for dataset_name, acc, ani, mse, queries in results:
|
||
print(f'{dataset_name}\n'
|
||
f'\t Acc:{acc}\n'
|
||
f'\t ANI:{ani}\n'
|
||
f'\t MSE:{mse}\n'
|
||
f'\t Que:{queries}\n')
|
||
|
||
return results
|
||
|
||
def count_classes_samples(folder_path, select_colum='Success'):
|
||
"""
|
||
根据给定文件夹路径,分析包含的information*文件,统计文件中第4列和第5列的元组出现个数,
|
||
仅在第9列的值为'Success'时统计。
|
||
|
||
:param select_colum: 选择统计的第九列类别
|
||
:param folder_path: 被分析的文件夹根目录
|
||
:return: 元组出现次数的二维数组
|
||
"""
|
||
key_pair_counts = {}
|
||
|
||
for root, dirs, files in os.walk(folder_path):
|
||
for file in files:
|
||
# 筛选子文件夹内所有information*文件
|
||
if re.match(r'information\d', file):
|
||
# 打开文件并读取内容
|
||
with open(os.path.join(root, file), 'r') as f:
|
||
data = f.read()
|
||
|
||
# 将数据拆分成行并解析成列表
|
||
lines = data.strip().split('\n')
|
||
records = [line.split() for line in lines if len(line.split()) > 8] # 确保至少有9列
|
||
|
||
# 统计第4列和第5列的键对值出现次数,仅在第9列为'select_colum'时统计,默认Success
|
||
for record in records:
|
||
if record[8] == select_colum:
|
||
key_pair = (int(record[3]), int(record[4]))
|
||
if key_pair in key_pair_counts:
|
||
key_pair_counts[key_pair] += 1
|
||
else:
|
||
key_pair_counts[key_pair] = 1
|
||
|
||
# 将结果转换为二维数组
|
||
result_array = [[key_pair[0], key_pair[1], count] for key_pair, count in key_pair_counts.items()]
|
||
|
||
return result_array
|
||
|
||
|
||
def generate_heat_maps_array(data, default_value=0, classes_size=None):
|
||
"""
|
||
根据给定的数据生成一个二维数组。规则是对于每个元组,例如[6, 1, 1],
|
||
将生成的二维数组array[6][1]设置为1。
|
||
|
||
:param classes_size: 元组维度
|
||
:param data: 包含元组的列表,每个元组包含三个整数(行索引,列索引,值)。
|
||
:param default_value: 生成的二维数组的默认值,默认为0。
|
||
:return: 生成的二维数组。
|
||
"""
|
||
# 找到二维数组的大小
|
||
if classes_size is not None:
|
||
max_row = max_col = classes_size
|
||
else:
|
||
max_row = max(item[0] for item in data) + 1
|
||
max_col = max(item[1] for item in data) + 1
|
||
|
||
# 初始化二维数组
|
||
array = [[default_value for _ in range(max_col)] for _ in range(max_row)]
|
||
|
||
# 设置数组中的指定值
|
||
for row, col, value in data:
|
||
array[col][row] = value
|
||
|
||
return array
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# 定义要分析的实验结果文件夹路径列表
|
||
experiment_folders = [
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Car",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Wafer",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ItalyPowerDemand",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/AllGestureWiimoteY",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/NonIFECGTho2",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Trace",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/PigAirwayPressure",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/GesturePebbleZ2",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/AllGestureWiimoteX",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/FordA",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/DodgerLoopDay",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/SonyAIBORobotSurface1",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/PigArtPressure",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/MoteStrain",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ECGFiveDays",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/PickupGeWZ",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/FreezerSmallTrain",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/TwoLeadECG",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Lightning7",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Phoneme",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/DodgerLoopWeekend",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ECG5000",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ShakeGestureWiimoteZ",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/GesturePebbleZ1",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/NonIFECGTho1",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/AllGestureWiimoteZ",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/FordB",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/InsectWingbeatSound",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/EOGHorizontalSignal",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ChlorineConcentration",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Plane",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/EOGVerticalSignal",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/DodgerLoopGame",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/ECG200",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Fungi",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/CinCEC",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/SonyAIBORobotSurface2",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Lightning2",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/FreezerRegularTrain",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/StarLightCurves",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/Earthquakes",
|
||
"/Users/catb/Library/CloudStorage/CloudMounter-B40-4/home/BJTU/project/CPadv/CCPP实验结果/model_f/PigCVP",
|
||
]
|
||
|
||
# 分析所有文件夹并将结果保存到单个CSV文件中
|
||
results_df = analyze_multiple_folders(experiment_folders)
|
||
|
||
# 如果想查看结果但不保存CSV,可以使用:
|
||
# results_df = analyze_multiple_folders(experiment_folders, restore=False) |