init
This commit is contained in:
commit
18b3ee16e3
53
compare_colume.py
Normal file
53
compare_colume.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def compare_excel_columns(file_a, file_b, sheet_a=0, sheet_b=0):
|
||||||
|
"""
|
||||||
|
对比两个Excel文件中指定列的数据是否完全相同
|
||||||
|
|
||||||
|
参数:
|
||||||
|
file_a: 表A的文件路径
|
||||||
|
file_b: 表B的文件路径
|
||||||
|
sheet_a: 表A的工作表索引或名称(默认为第一个工作表)
|
||||||
|
sheet_b: 表B的工作表索引或名称(默认为第一个工作表)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 读取Excel文件
|
||||||
|
df_a = pd.read_excel(file_a, sheet_name=sheet_a, header=None)
|
||||||
|
df_b = pd.read_excel(file_b, sheet_name=sheet_b, header=None)
|
||||||
|
|
||||||
|
# 提取F列从F4(即索引3行)开始的数据(注意: pandas列F是第5列,索引为4)
|
||||||
|
col_a = df_a.iloc[3:, 5].reset_index(drop=True)
|
||||||
|
# 提取G列从G4(即索引3行)开始的数据(注意: pandas列G是第6列,索引为6)
|
||||||
|
col_b = df_b.iloc[3:, 5].reset_index(drop=True)
|
||||||
|
|
||||||
|
# 比较长度
|
||||||
|
if len(col_a) != len(col_b):
|
||||||
|
print(f"数据长度不一致: 表A有{len(col_a)}行,表B有{len(col_b)}行")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 比较内容
|
||||||
|
comparison = col_a == col_b
|
||||||
|
if comparison.all():
|
||||||
|
print("两列数据完全相同")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
# 找出不同的行
|
||||||
|
diff_indices = comparison[comparison == False].index
|
||||||
|
print(f"发现{len(diff_indices)}处不同:")
|
||||||
|
for idx in diff_indices:
|
||||||
|
print(f"行 {idx + 4}: 表A值='{col_a[idx]}', 表B值='{col_b[idx]}'")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"发生错误: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# 使用示例
|
||||||
|
if __name__ == "__main__":
|
||||||
|
file_a = "4_2/全国统计系统会议接送表 准1 - 副本(1).xls" # 替换为你的表A文件路径
|
||||||
|
file_b = "4_2/(4.2)全国统计系统办公室工作会议参会人员报名表 - 副本(1).xls" # 替换为你的表B文件路径
|
||||||
|
|
||||||
|
result = compare_excel_columns(file_a, file_b)
|
||||||
|
print("对比结果:", "相同" if result else "不同")
|
32
extract_time.py
Normal file
32
extract_time.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
import xlrd
|
||||||
|
from openpyxl import Workbook
|
||||||
|
import re
|
||||||
|
|
||||||
|
# 读取 .xls 文件
|
||||||
|
input_file = "全国统计系统会议接送表.xls"
|
||||||
|
workbook = xlrd.open_workbook(input_file)
|
||||||
|
sheet = workbook.sheet_by_index(0) # 第一个工作表
|
||||||
|
|
||||||
|
# 创建新的 .xlsx 文件用于写入
|
||||||
|
output_workbook = Workbook()
|
||||||
|
output_sheet = output_workbook.active
|
||||||
|
|
||||||
|
# 复制原数据到新文件(保持格式)
|
||||||
|
for row in range(sheet.nrows):
|
||||||
|
for col in range(sheet.ncols):
|
||||||
|
output_sheet.cell(row=row + 1, column=col + 1, value=sheet.cell_value(row, col))
|
||||||
|
|
||||||
|
# 提取 F 列时间并写入 M 列
|
||||||
|
for row in range(3, sheet.nrows): # 从第4行开始(索引从0开始)
|
||||||
|
cell_value = sheet.cell_value(row, 5) # F列是第6列(xlrd列索引从0开始)
|
||||||
|
if cell_value:
|
||||||
|
# 提取时间(匹配 14:25、16:35 等格式)
|
||||||
|
time_match = re.search(r'(\d{1,2}[::]\d{2})', str(cell_value))
|
||||||
|
if time_match:
|
||||||
|
extracted_time = time_match.group(1).replace(':', ':') # 统一时间格式
|
||||||
|
output_sheet.cell(row=row + 1, column=13, value=extracted_time) # M列是第13列
|
||||||
|
|
||||||
|
# 保存为新的 .xlsx 文件
|
||||||
|
output_file = "全国统计系统会议接送表_更新.xlsx"
|
||||||
|
output_workbook.save(output_file)
|
||||||
|
print(f"处理完成,结果已保存至: {output_file}")
|
61
sort_time.py
Normal file
61
sort_time.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
import xlrd
|
||||||
|
import xlwt
|
||||||
|
import re
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# 读取 .xls 文件
|
||||||
|
input_file = "全国统计系统会议接送表.xls"
|
||||||
|
workbook = xlrd.open_workbook(input_file)
|
||||||
|
sheet = workbook.sheet_by_index(0)
|
||||||
|
|
||||||
|
# 创建新的 .xls 文件
|
||||||
|
output_workbook = xlwt.Workbook()
|
||||||
|
output_sheet = output_workbook.add_sheet("Sheet1")
|
||||||
|
|
||||||
|
# 复制表头
|
||||||
|
for col in range(sheet.ncols):
|
||||||
|
output_sheet.write(0, col, sheet.cell_value(0, col))
|
||||||
|
output_sheet.write(1, col, sheet.cell_value(1, col))
|
||||||
|
output_sheet.write(2, col, sheet.cell_value(2, col))
|
||||||
|
|
||||||
|
# 添加新列标题"到达时间"到M列
|
||||||
|
output_sheet.write(2, 12, "到达时间")
|
||||||
|
|
||||||
|
# 准备数据行并提取时间
|
||||||
|
data_rows = []
|
||||||
|
for row in range(3, sheet.nrows):
|
||||||
|
row_data = [sheet.cell_value(row, col) for col in range(sheet.ncols)]
|
||||||
|
cell_value = sheet.cell_value(row, 5) # F列是第6列(xlrd列索引从0开始)
|
||||||
|
extracted_time = None
|
||||||
|
|
||||||
|
if cell_value:
|
||||||
|
time_match = re.search(r'(\d{1,2}[::]\d{2})', str(cell_value))
|
||||||
|
if time_match:
|
||||||
|
extracted_time = time_match.group(1).replace(':', ':')
|
||||||
|
# 转换为时间对象便于排序
|
||||||
|
try:
|
||||||
|
time_obj = datetime.strptime(extracted_time, "%H:%M").time()
|
||||||
|
except ValueError:
|
||||||
|
time_obj = None
|
||||||
|
else:
|
||||||
|
time_obj = None
|
||||||
|
else:
|
||||||
|
time_obj = None
|
||||||
|
|
||||||
|
row_data.append(extracted_time) # 添加提取的时间字符串到行数据
|
||||||
|
data_rows.append((time_obj, row, row_data)) # 存储时间对象、原行号和行数据
|
||||||
|
|
||||||
|
# 排序:先按时间排序,没有时间的放在最后
|
||||||
|
data_rows.sort(key=lambda x: (x[0] is not None, x[0] or datetime.min.time()))
|
||||||
|
|
||||||
|
# 写入排序后的数据
|
||||||
|
output_row = 3
|
||||||
|
for time_obj, original_row, row_data in data_rows:
|
||||||
|
for col in range(len(row_data)):
|
||||||
|
output_sheet.write(output_row, col, row_data[col])
|
||||||
|
output_row += 1
|
||||||
|
|
||||||
|
# 保存
|
||||||
|
output_file = "全国统计系统会议接送表_排序.xls"
|
||||||
|
output_workbook.save(output_file)
|
||||||
|
print(f"处理完成,结果已保存至: {output_file}")
|
118
split_sheet.py
Normal file
118
split_sheet.py
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
import xlrd
|
||||||
|
import xlwt
|
||||||
|
import re
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
def extract_time(cell_value):
|
||||||
|
"""从单元格内容中提取时间"""
|
||||||
|
if cell_value:
|
||||||
|
time_match = re.search(r'(\d{1,2}[::]\d{2})', str(cell_value))
|
||||||
|
if time_match:
|
||||||
|
return time_match.group(1).replace(':', ':')
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def is_airport(cell_value):
|
||||||
|
"""判断是否是机场到达(包含T1/T2/T3/机场/航站楼等关键词)"""
|
||||||
|
if not cell_value:
|
||||||
|
return False
|
||||||
|
return any(keyword in str(cell_value) for keyword in ['T1', 'T2', 'T3', '机场', '航站楼'])
|
||||||
|
|
||||||
|
|
||||||
|
def is_train_station(cell_value):
|
||||||
|
"""判断是否是火车站到达(包含站/高铁等关键词)"""
|
||||||
|
if not cell_value:
|
||||||
|
return False
|
||||||
|
return any(keyword in str(cell_value) for keyword in ['站', '高铁', 'G', '北站', '贵阳站'])
|
||||||
|
|
||||||
|
|
||||||
|
# 读取原始文件
|
||||||
|
input_file = "4_2/全国统计系统会议接送表(1)(1).xls"
|
||||||
|
workbook = xlrd.open_workbook(input_file)
|
||||||
|
sheet = workbook.sheet_by_index(0)
|
||||||
|
|
||||||
|
# 创建两个输出工作簿
|
||||||
|
airplane_workbook = xlwt.Workbook()
|
||||||
|
train_workbook = xlwt.Workbook()
|
||||||
|
|
||||||
|
# 创建工作表
|
||||||
|
airplane_sheet = airplane_workbook.add_sheet("飞机到达表")
|
||||||
|
train_sheet = train_workbook.add_sheet("火车到达表")
|
||||||
|
|
||||||
|
# 准备存储数据
|
||||||
|
airplane_data = []
|
||||||
|
train_data = []
|
||||||
|
|
||||||
|
# 处理表头
|
||||||
|
headers = [sheet.cell_value(row, col) for row in range(3) for col in range(sheet.ncols)]
|
||||||
|
header_row_count = 3 # 原表有3行表头
|
||||||
|
|
||||||
|
# 复制表头到两个工作表
|
||||||
|
for row in range(header_row_count):
|
||||||
|
for col in range(sheet.ncols):
|
||||||
|
airplane_sheet.write(row, col, sheet.cell_value(row, col))
|
||||||
|
train_sheet.write(row, col, sheet.cell_value(row, col))
|
||||||
|
|
||||||
|
|
||||||
|
# 处理数据行
|
||||||
|
for row in range(header_row_count, sheet.nrows):
|
||||||
|
row_data = [sheet.cell_value(row, col) for col in range(sheet.ncols)]
|
||||||
|
cell_value = sheet.cell_value(row, 5) # F列
|
||||||
|
extracted_time = extract_time(cell_value)
|
||||||
|
|
||||||
|
# 转换为时间对象用于排序
|
||||||
|
time_obj = None
|
||||||
|
if extracted_time:
|
||||||
|
try:
|
||||||
|
time_obj = datetime.strptime(extracted_time, "%H:%M").time()
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 添加提取的时间到行数据
|
||||||
|
row_data.append(extracted_time)
|
||||||
|
|
||||||
|
# 根据到达方式分类
|
||||||
|
if is_airport(cell_value):
|
||||||
|
airplane_data.append((time_obj, row_data))
|
||||||
|
elif is_train_station(cell_value):
|
||||||
|
train_data.append((time_obj, row_data))
|
||||||
|
else:
|
||||||
|
# 无法识别的到达方式,根据用户需求决定放在哪个表
|
||||||
|
# 这里默认放入火车到达表
|
||||||
|
train_data.append((time_obj, row_data))
|
||||||
|
|
||||||
|
|
||||||
|
# 排序函数
|
||||||
|
def sort_key(item):
|
||||||
|
time_obj, row_data = item
|
||||||
|
return (time_obj is not None, time_obj or datetime.min.time())
|
||||||
|
|
||||||
|
|
||||||
|
# 对两个数据集分别排序
|
||||||
|
airplane_data.sort(key=sort_key)
|
||||||
|
train_data.sort(key=sort_key)
|
||||||
|
|
||||||
|
# 写入飞机到达表
|
||||||
|
output_row = header_row_count
|
||||||
|
for time_obj, row_data in airplane_data:
|
||||||
|
for col in range(len(row_data)):
|
||||||
|
airplane_sheet.write(output_row, col, row_data[col])
|
||||||
|
output_row += 1
|
||||||
|
|
||||||
|
# 写入火车到达表
|
||||||
|
output_row = header_row_count
|
||||||
|
for time_obj, row_data in train_data:
|
||||||
|
for col in range(len(row_data)):
|
||||||
|
train_sheet.write(output_row, col, row_data[col])
|
||||||
|
output_row += 1
|
||||||
|
|
||||||
|
# 保存文件
|
||||||
|
airplane_file = "飞机到达表.xls"
|
||||||
|
train_file = "火车到达表.xls"
|
||||||
|
|
||||||
|
airplane_workbook.save(airplane_file)
|
||||||
|
train_workbook.save(train_file)
|
||||||
|
|
||||||
|
print(f"处理完成,飞机到达表已保存至: {airplane_file}")
|
||||||
|
print(f"处理完成,火车到达表已保存至: {train_file}")
|
Loading…
Reference in New Issue
Block a user