shatian_excel/sort_time.py

import xlrd
import xlwt
import re
from datetime import datetime

# 读取 .xls 文件
input_file = "全国统计系统会议接送表.xls"
workbook = xlrd.open_workbook(input_file)
sheet = workbook.sheet_by_index(0)

# 创建新的 .xls 文件
output_workbook = xlwt.Workbook()
output_sheet = output_workbook.add_sheet("Sheet1")

# 复制表头
for col in range(sheet.ncols):
    output_sheet.write(0, col, sheet.cell_value(0, col))
    output_sheet.write(1, col, sheet.cell_value(1, col))
    output_sheet.write(2, col, sheet.cell_value(2, col))

# 添加新列标题"到达时间"到M列
output_sheet.write(2, 12, "到达时间")

# 准备数据行并提取时间
data_rows = []
for row in range(3, sheet.nrows):
    row_data = [sheet.cell_value(row, col) for col in range(sheet.ncols)]
    cell_value = sheet.cell_value(row, 5)  # F列是第6列（xlrd列索引从0开始）
    extracted_time = None

    if cell_value:
        time_match = re.search(r'(\d{1,2}[:：]\d{2})', str(cell_value))
        if time_match:
            extracted_time = time_match.group(1).replace('：', ':')
            # 转换为时间对象便于排序
            try:
                time_obj = datetime.strptime(extracted_time, "%H:%M").time()
            except ValueError:
                time_obj = None
        else:
            time_obj = None
    else:
        time_obj = None

    row_data.append(extracted_time)  # 添加提取的时间字符串到行数据
    data_rows.append((time_obj, row, row_data))  # 存储时间对象、原行号和行数据

# 排序：先按时间排序，没有时间的放在最后
data_rows.sort(key=lambda x: (x[0] is not None, x[0] or datetime.min.time()))

# 写入排序后的数据
output_row = 3
for time_obj, original_row, row_data in data_rows:
    for col in range(len(row_data)):
        output_sheet.write(output_row, col, row_data[col])
    output_row += 1

# 保存
output_file = "全国统计系统会议接送表_排序.xls"
output_workbook.save(output_file)
print(f"处理完成，结果已保存至: {output_file}")