shatian_excel/extract_time.py

import xlrd
from openpyxl import Workbook
import re

# 读取 .xls 文件
input_file = "全国统计系统会议接送表.xls"
workbook = xlrd.open_workbook(input_file)
sheet = workbook.sheet_by_index(0)  # 第一个工作表

# 创建新的 .xlsx 文件用于写入
output_workbook = Workbook()
output_sheet = output_workbook.active

# 复制原数据到新文件（保持格式）
for row in range(sheet.nrows):
    for col in range(sheet.ncols):
        output_sheet.cell(row=row + 1, column=col + 1, value=sheet.cell_value(row, col))

# 提取 F 列时间并写入 M 列
for row in range(3, sheet.nrows):  # 从第4行开始（索引从0开始）
    cell_value = sheet.cell_value(row, 5)  # F列是第6列（xlrd列索引从0开始）
    if cell_value:
        # 提取时间（匹配 14:25、16：35 等格式）
        time_match = re.search(r'(\d{1,2}[:：]\d{2})', str(cell_value))
        if time_match:
            extracted_time = time_match.group(1).replace('：', ':')  # 统一时间格式
            output_sheet.cell(row=row + 1, column=13, value=extracted_time)  # M列是第13列

# 保存为新的 .xlsx 文件
output_file = "全国统计系统会议接送表_更新.xlsx"
output_workbook.save(output_file)
print(f"处理完成，结果已保存至: {output_file}")