118 lines
3.5 KiB
Python
118 lines
3.5 KiB
Python
import xlrd
|
||
import xlwt
|
||
import re
|
||
from datetime import datetime
|
||
|
||
|
||
def extract_time(cell_value):
|
||
"""从单元格内容中提取时间"""
|
||
if cell_value:
|
||
time_match = re.search(r'(\d{1,2}[::]\d{2})', str(cell_value))
|
||
if time_match:
|
||
return time_match.group(1).replace(':', ':')
|
||
return None
|
||
|
||
|
||
def is_airport(cell_value):
|
||
"""判断是否是机场到达(包含T1/T2/T3/机场/航站楼等关键词)"""
|
||
if not cell_value:
|
||
return False
|
||
return any(keyword in str(cell_value) for keyword in ['T1', 'T2', 'T3', '机场', '航站楼'])
|
||
|
||
|
||
def is_train_station(cell_value):
|
||
"""判断是否是火车站到达(包含站/高铁等关键词)"""
|
||
if not cell_value:
|
||
return False
|
||
return any(keyword in str(cell_value) for keyword in ['站', '高铁', 'G', '北站', '贵阳站'])
|
||
|
||
|
||
# 读取原始文件
|
||
input_file = "4_2/全国统计系统会议接送表(1)(1).xls"
|
||
workbook = xlrd.open_workbook(input_file)
|
||
sheet = workbook.sheet_by_index(0)
|
||
|
||
# 创建两个输出工作簿
|
||
airplane_workbook = xlwt.Workbook()
|
||
train_workbook = xlwt.Workbook()
|
||
|
||
# 创建工作表
|
||
airplane_sheet = airplane_workbook.add_sheet("飞机到达表")
|
||
train_sheet = train_workbook.add_sheet("火车到达表")
|
||
|
||
# 准备存储数据
|
||
airplane_data = []
|
||
train_data = []
|
||
|
||
# 处理表头
|
||
headers = [sheet.cell_value(row, col) for row in range(3) for col in range(sheet.ncols)]
|
||
header_row_count = 3 # 原表有3行表头
|
||
|
||
# 复制表头到两个工作表
|
||
for row in range(header_row_count):
|
||
for col in range(sheet.ncols):
|
||
airplane_sheet.write(row, col, sheet.cell_value(row, col))
|
||
train_sheet.write(row, col, sheet.cell_value(row, col))
|
||
|
||
|
||
# 处理数据行
|
||
for row in range(header_row_count, sheet.nrows):
|
||
row_data = [sheet.cell_value(row, col) for col in range(sheet.ncols)]
|
||
cell_value = sheet.cell_value(row, 5) # F列
|
||
extracted_time = extract_time(cell_value)
|
||
|
||
# 转换为时间对象用于排序
|
||
time_obj = None
|
||
if extracted_time:
|
||
try:
|
||
time_obj = datetime.strptime(extracted_time, "%H:%M").time()
|
||
except ValueError:
|
||
pass
|
||
|
||
# 添加提取的时间到行数据
|
||
row_data.append(extracted_time)
|
||
|
||
# 根据到达方式分类
|
||
if is_airport(cell_value):
|
||
airplane_data.append((time_obj, row_data))
|
||
elif is_train_station(cell_value):
|
||
train_data.append((time_obj, row_data))
|
||
else:
|
||
# 无法识别的到达方式,根据用户需求决定放在哪个表
|
||
# 这里默认放入火车到达表
|
||
train_data.append((time_obj, row_data))
|
||
|
||
|
||
# 排序函数
|
||
def sort_key(item):
|
||
time_obj, row_data = item
|
||
return (time_obj is not None, time_obj or datetime.min.time())
|
||
|
||
|
||
# 对两个数据集分别排序
|
||
airplane_data.sort(key=sort_key)
|
||
train_data.sort(key=sort_key)
|
||
|
||
# 写入飞机到达表
|
||
output_row = header_row_count
|
||
for time_obj, row_data in airplane_data:
|
||
for col in range(len(row_data)):
|
||
airplane_sheet.write(output_row, col, row_data[col])
|
||
output_row += 1
|
||
|
||
# 写入火车到达表
|
||
output_row = header_row_count
|
||
for time_obj, row_data in train_data:
|
||
for col in range(len(row_data)):
|
||
train_sheet.write(output_row, col, row_data[col])
|
||
output_row += 1
|
||
|
||
# 保存文件
|
||
airplane_file = "飞机到达表.xls"
|
||
train_file = "火车到达表.xls"
|
||
|
||
airplane_workbook.save(airplane_file)
|
||
train_workbook.save(train_file)
|
||
|
||
print(f"处理完成,飞机到达表已保存至: {airplane_file}")
|
||
print(f"处理完成,火车到达表已保存至: {train_file}") |