import xlrd import xlwt import re from datetime import datetime def extract_time(cell_value): """从单元格内容中提取时间""" if cell_value: time_match = re.search(r'(\d{1,2}[::]\d{2})', str(cell_value)) if time_match: return time_match.group(1).replace(':', ':') return None def is_airport(cell_value): """判断是否是机场到达(包含T1/T2/T3/机场/航站楼等关键词)""" if not cell_value: return False return any(keyword in str(cell_value) for keyword in ['T1', 'T2', 'T3', '机场', '航站楼']) def is_train_station(cell_value): """判断是否是火车站到达(包含站/高铁等关键词)""" if not cell_value: return False return any(keyword in str(cell_value) for keyword in ['站', '高铁', 'G', '北站', '贵阳站']) # 读取原始文件 input_file = "4_2/全国统计系统会议接送表(1)(1).xls" workbook = xlrd.open_workbook(input_file) sheet = workbook.sheet_by_index(0) # 创建两个输出工作簿 airplane_workbook = xlwt.Workbook() train_workbook = xlwt.Workbook() # 创建工作表 airplane_sheet = airplane_workbook.add_sheet("飞机到达表") train_sheet = train_workbook.add_sheet("火车到达表") # 准备存储数据 airplane_data = [] train_data = [] # 处理表头 headers = [sheet.cell_value(row, col) for row in range(3) for col in range(sheet.ncols)] header_row_count = 3 # 原表有3行表头 # 复制表头到两个工作表 for row in range(header_row_count): for col in range(sheet.ncols): airplane_sheet.write(row, col, sheet.cell_value(row, col)) train_sheet.write(row, col, sheet.cell_value(row, col)) # 处理数据行 for row in range(header_row_count, sheet.nrows): row_data = [sheet.cell_value(row, col) for col in range(sheet.ncols)] cell_value = sheet.cell_value(row, 5) # F列 extracted_time = extract_time(cell_value) # 转换为时间对象用于排序 time_obj = None if extracted_time: try: time_obj = datetime.strptime(extracted_time, "%H:%M").time() except ValueError: pass # 添加提取的时间到行数据 row_data.append(extracted_time) # 根据到达方式分类 if is_airport(cell_value): airplane_data.append((time_obj, row_data)) elif is_train_station(cell_value): train_data.append((time_obj, row_data)) else: # 无法识别的到达方式,根据用户需求决定放在哪个表 # 这里默认放入火车到达表 train_data.append((time_obj, row_data)) # 排序函数 def sort_key(item): time_obj, row_data = item return (time_obj is not None, time_obj or datetime.min.time()) # 对两个数据集分别排序 airplane_data.sort(key=sort_key) train_data.sort(key=sort_key) # 写入飞机到达表 output_row = header_row_count for time_obj, row_data in airplane_data: for col in range(len(row_data)): airplane_sheet.write(output_row, col, row_data[col]) output_row += 1 # 写入火车到达表 output_row = header_row_count for time_obj, row_data in train_data: for col in range(len(row_data)): train_sheet.write(output_row, col, row_data[col]) output_row += 1 # 保存文件 airplane_file = "飞机到达表.xls" train_file = "火车到达表.xls" airplane_workbook.save(airplane_file) train_workbook.save(train_file) print(f"处理完成,飞机到达表已保存至: {airplane_file}") print(f"处理完成,火车到达表已保存至: {train_file}")