shatian_excel/split_sheet.py
2025-04-04 16:48:59 +08:00

118 lines
3.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import xlrd
import xlwt
import re
from datetime import datetime
def extract_time(cell_value):
"""从单元格内容中提取时间"""
if cell_value:
time_match = re.search(r'(\d{1,2}[:]\d{2})', str(cell_value))
if time_match:
return time_match.group(1).replace('', ':')
return None
def is_airport(cell_value):
"""判断是否是机场到达包含T1/T2/T3/机场/航站楼等关键词)"""
if not cell_value:
return False
return any(keyword in str(cell_value) for keyword in ['T1', 'T2', 'T3', '机场', '航站楼'])
def is_train_station(cell_value):
"""判断是否是火车站到达(包含站/高铁等关键词)"""
if not cell_value:
return False
return any(keyword in str(cell_value) for keyword in ['', '高铁', 'G', '北站', '贵阳站'])
# 读取原始文件
input_file = "4_2/全国统计系统会议接送表(1)(1).xls"
workbook = xlrd.open_workbook(input_file)
sheet = workbook.sheet_by_index(0)
# 创建两个输出工作簿
airplane_workbook = xlwt.Workbook()
train_workbook = xlwt.Workbook()
# 创建工作表
airplane_sheet = airplane_workbook.add_sheet("飞机到达表")
train_sheet = train_workbook.add_sheet("火车到达表")
# 准备存储数据
airplane_data = []
train_data = []
# 处理表头
headers = [sheet.cell_value(row, col) for row in range(3) for col in range(sheet.ncols)]
header_row_count = 3 # 原表有3行表头
# 复制表头到两个工作表
for row in range(header_row_count):
for col in range(sheet.ncols):
airplane_sheet.write(row, col, sheet.cell_value(row, col))
train_sheet.write(row, col, sheet.cell_value(row, col))
# 处理数据行
for row in range(header_row_count, sheet.nrows):
row_data = [sheet.cell_value(row, col) for col in range(sheet.ncols)]
cell_value = sheet.cell_value(row, 5) # F列
extracted_time = extract_time(cell_value)
# 转换为时间对象用于排序
time_obj = None
if extracted_time:
try:
time_obj = datetime.strptime(extracted_time, "%H:%M").time()
except ValueError:
pass
# 添加提取的时间到行数据
row_data.append(extracted_time)
# 根据到达方式分类
if is_airport(cell_value):
airplane_data.append((time_obj, row_data))
elif is_train_station(cell_value):
train_data.append((time_obj, row_data))
else:
# 无法识别的到达方式,根据用户需求决定放在哪个表
# 这里默认放入火车到达表
train_data.append((time_obj, row_data))
# 排序函数
def sort_key(item):
time_obj, row_data = item
return (time_obj is not None, time_obj or datetime.min.time())
# 对两个数据集分别排序
airplane_data.sort(key=sort_key)
train_data.sort(key=sort_key)
# 写入飞机到达表
output_row = header_row_count
for time_obj, row_data in airplane_data:
for col in range(len(row_data)):
airplane_sheet.write(output_row, col, row_data[col])
output_row += 1
# 写入火车到达表
output_row = header_row_count
for time_obj, row_data in train_data:
for col in range(len(row_data)):
train_sheet.write(output_row, col, row_data[col])
output_row += 1
# 保存文件
airplane_file = "飞机到达表.xls"
train_file = "火车到达表.xls"
airplane_workbook.save(airplane_file)
train_workbook.save(train_file)
print(f"处理完成,飞机到达表已保存至: {airplane_file}")
print(f"处理完成,火车到达表已保存至: {train_file}")