shatian_excel/split_sheet.py

import xlrd
import xlwt
import re
from datetime import datetime


def extract_time(cell_value):
    """从单元格内容中提取时间"""
    if cell_value:
        time_match = re.search(r'(\d{1,2}[:：]\d{2})', str(cell_value))
        if time_match:
            return time_match.group(1).replace('：', ':')
    return None


def is_airport(cell_value):
    """判断是否是机场到达（包含T1/T2/T3/机场/航站楼等关键词）"""
    if not cell_value:
        return False
    return any(keyword in str(cell_value) for keyword in ['T1', 'T2', 'T3', '机场', '航站楼'])


def is_train_station(cell_value):
    """判断是否是火车站到达（包含站/高铁等关键词）"""
    if not cell_value:
        return False
    return any(keyword in str(cell_value) for keyword in ['站', '高铁', 'G', '北站', '贵阳站'])


# 读取原始文件
input_file = "4_2/全国统计系统会议接送表(1)(1).xls"
workbook = xlrd.open_workbook(input_file)
sheet = workbook.sheet_by_index(0)

# 创建两个输出工作簿
airplane_workbook = xlwt.Workbook()
train_workbook = xlwt.Workbook()

# 创建工作表
airplane_sheet = airplane_workbook.add_sheet("飞机到达表")
train_sheet = train_workbook.add_sheet("火车到达表")

# 准备存储数据
airplane_data = []
train_data = []

# 处理表头
headers = [sheet.cell_value(row, col) for row in range(3) for col in range(sheet.ncols)]
header_row_count = 3  # 原表有3行表头

# 复制表头到两个工作表
for row in range(header_row_count):
    for col in range(sheet.ncols):
        airplane_sheet.write(row, col, sheet.cell_value(row, col))
        train_sheet.write(row, col, sheet.cell_value(row, col))


# 处理数据行
for row in range(header_row_count, sheet.nrows):
    row_data = [sheet.cell_value(row, col) for col in range(sheet.ncols)]
    cell_value = sheet.cell_value(row, 5)  # F列
    extracted_time = extract_time(cell_value)

    # 转换为时间对象用于排序
    time_obj = None
    if extracted_time:
        try:
            time_obj = datetime.strptime(extracted_time, "%H:%M").time()
        except ValueError:
            pass

    # 添加提取的时间到行数据
    row_data.append(extracted_time)

    # 根据到达方式分类
    if is_airport(cell_value):
        airplane_data.append((time_obj, row_data))
    elif is_train_station(cell_value):
        train_data.append((time_obj, row_data))
    else:
        # 无法识别的到达方式，根据用户需求决定放在哪个表
        # 这里默认放入火车到达表
        train_data.append((time_obj, row_data))


# 排序函数
def sort_key(item):
    time_obj, row_data = item
    return (time_obj is not None, time_obj or datetime.min.time())


# 对两个数据集分别排序
airplane_data.sort(key=sort_key)
train_data.sort(key=sort_key)

# 写入飞机到达表
output_row = header_row_count
for time_obj, row_data in airplane_data:
    for col in range(len(row_data)):
        airplane_sheet.write(output_row, col, row_data[col])
    output_row += 1

# 写入火车到达表
output_row = header_row_count
for time_obj, row_data in train_data:
    for col in range(len(row_data)):
        train_sheet.write(output_row, col, row_data[col])
    output_row += 1

# 保存文件
airplane_file = "飞机到达表.xls"
train_file = "火车到达表.xls"

airplane_workbook.save(airplane_file)
train_workbook.save(train_file)

print(f"处理完成，飞机到达表已保存至: {airplane_file}")
print(f"处理完成，火车到达表已保存至: {train_file}")