✨ feat(news_analyze.py): 支持缓存分析结果并提取重要新闻原文
This commit is contained in:
parent
eb776fd3cb
commit
601a940882
257
news_analyze.py
257
news_analyze.py
@ -146,14 +146,34 @@ def get_news_by_date(date_str, force_update=False):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def analyze_news_in_batches(news_df):
|
def analyze_news_in_batches(news_df, date_str=None, force_analyze=False):
|
||||||
"""将新闻按批次分析后汇总"""
|
"""将新闻按批次分析后汇总,支持缓存分析结果"""
|
||||||
if news_df is None or news_df.empty:
|
if news_df is None or news_df.empty:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# 确保temp目录存在
|
||||||
|
temp_dir = ensure_temp_dir()
|
||||||
|
|
||||||
config = load_config()
|
config = load_config()
|
||||||
openai_config = config['openai_api']
|
openai_config = config['openai_api']
|
||||||
|
|
||||||
|
# 如果提供了日期,尝试读取缓存的分析结果
|
||||||
|
cache_file = None
|
||||||
|
if date_str:
|
||||||
|
date_file = datetime.datetime.strptime(date_str, '%Y-%m-%d').strftime('%Y%m%d')
|
||||||
|
cache_file = os.path.join(temp_dir, f"analysis_cache_{date_file}.json")
|
||||||
|
|
||||||
|
# 检查是否有缓存,且不强制重新分析
|
||||||
|
if os.path.exists(cache_file) and not force_analyze:
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
with open(cache_file, 'r', encoding='utf-8') as f:
|
||||||
|
cached_analysis = json.load(f)
|
||||||
|
print(f"使用缓存的分析结果: {cache_file}")
|
||||||
|
return cached_analysis
|
||||||
|
except Exception as e:
|
||||||
|
print(f"读取缓存文件时出错: {e}")
|
||||||
|
|
||||||
# 初始化 OpenAI 客户端
|
# 初始化 OpenAI 客户端
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
api_key=openai_config['api_key'],
|
api_key=openai_config['api_key'],
|
||||||
@ -164,6 +184,8 @@ def analyze_news_in_batches(news_df):
|
|||||||
batch_size = 200
|
batch_size = 200
|
||||||
total_batches = math.ceil(len(news_df) / batch_size)
|
total_batches = math.ceil(len(news_df) / batch_size)
|
||||||
batch_results = []
|
batch_results = []
|
||||||
|
# 保存每批次的重要新闻原文
|
||||||
|
important_news = []
|
||||||
|
|
||||||
print(f"将{len(news_df)}条新闻分成{total_batches}批进行分析,每批{batch_size}条")
|
print(f"将{len(news_df)}条新闻分成{total_batches}批进行分析,每批{batch_size}条")
|
||||||
|
|
||||||
@ -171,12 +193,41 @@ def analyze_news_in_batches(news_df):
|
|||||||
start_idx = i * batch_size
|
start_idx = i * batch_size
|
||||||
end_idx = min((i + 1) * batch_size, len(news_df))
|
end_idx = min((i + 1) * batch_size, len(news_df))
|
||||||
batch_df = news_df.iloc[start_idx:end_idx]
|
batch_df = news_df.iloc[start_idx:end_idx]
|
||||||
|
|
||||||
print(f"分析第{i + 1}/{total_batches}批新闻...")
|
print(f"分析第{i + 1}/{total_batches}批新闻...")
|
||||||
|
|
||||||
|
# 生成缓存文件名,按批次缓存
|
||||||
|
batch_cache_file = None
|
||||||
|
if date_str:
|
||||||
|
date_file = datetime.datetime.strptime(date_str, '%Y-%m-%d').strftime('%Y%m%d')
|
||||||
|
batch_cache_file = os.path.join(temp_dir, f"batch_analysis_cache_{date_file}_{i + 1}.json")
|
||||||
|
|
||||||
|
# 检查是否有批次缓存,且不强制重新分析
|
||||||
|
if os.path.exists(batch_cache_file) and not force_analyze:
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
with open(batch_cache_file, 'r', encoding='utf-8') as f:
|
||||||
|
cached_batch = json.load(f)
|
||||||
|
print(f"使用缓存的批次分析结果: {batch_cache_file}")
|
||||||
|
batch_results.append(cached_batch)
|
||||||
|
|
||||||
|
# 添加此批次的重要新闻到列表
|
||||||
|
if "important_news" in cached_batch:
|
||||||
|
important_news.extend(cached_batch["important_news"])
|
||||||
|
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
print(f"读取批次缓存文件时出错: {e}")
|
||||||
|
|
||||||
# 合并当前批次新闻内容
|
# 合并当前批次新闻内容
|
||||||
batch_content = ""
|
batch_content = ""
|
||||||
|
batch_news = [] # 存储此批次的新闻
|
||||||
|
|
||||||
for _, row in batch_df.iterrows():
|
for _, row in batch_df.iterrows():
|
||||||
|
news_item = {
|
||||||
|
"datetime": row['datetime'],
|
||||||
|
"content": row['content']
|
||||||
|
}
|
||||||
|
batch_news.append(news_item)
|
||||||
batch_content += f"{row['datetime']}: {row['content']}\n\n"
|
batch_content += f"{row['datetime']}: {row['content']}\n\n"
|
||||||
|
|
||||||
# 对当前批次进行初步分析
|
# 对当前批次进行初步分析
|
||||||
@ -185,13 +236,25 @@ def analyze_news_in_batches(news_df):
|
|||||||
|
|
||||||
{batch_content}
|
{batch_content}
|
||||||
|
|
||||||
请总结以下两点:
|
请完成以下两项分析:
|
||||||
1. 这批新闻中提到的重要政策(特别是中国颁布的法令等),如果没有相关政策可以直接说明
|
1. 这批新闻中提到的重要政策(特别是中国颁布的法令等),如果没有相关政策可以直接说明
|
||||||
2. 这些政策可能对哪些股市板块带来影响(利好/利空)
|
2. 这些政策可能对哪些股市板块带来影响(利好/利空)
|
||||||
|
|
||||||
请只提取重要信息,简明扼要地回答。
|
对于重要的政策新闻,请在分析后提供一个"重要新闻列表",包含新闻的原文和发布时间。
|
||||||
"""
|
请只提取重要信息,过滤掉不相关的新闻(如普通的股价波动、ETF变动等)。
|
||||||
|
|
||||||
|
回复格式:
|
||||||
|
## 政策分析
|
||||||
|
[您的分析内容]
|
||||||
|
|
||||||
|
## 板块影响
|
||||||
|
[您的影响分析]
|
||||||
|
|
||||||
|
## 重要新闻列表
|
||||||
|
1. [时间] [完整新闻原文]
|
||||||
|
2. [时间] [完整新闻原文]
|
||||||
|
...
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
messages=[
|
messages=[
|
||||||
@ -203,14 +266,43 @@ def analyze_news_in_batches(news_df):
|
|||||||
model=openai_config['model'],
|
model=openai_config['model'],
|
||||||
temperature=0.2
|
temperature=0.2
|
||||||
)
|
)
|
||||||
|
|
||||||
batch_analysis = response.choices[0].message.content
|
batch_analysis = response.choices[0].message.content
|
||||||
|
|
||||||
batch_results.append({
|
# 提取重要新闻
|
||||||
|
batch_important_news = []
|
||||||
|
if "## 重要新闻列表" in batch_analysis:
|
||||||
|
news_section = batch_analysis.split("## 重要新闻列表", 1)[1].strip()
|
||||||
|
import re
|
||||||
|
# 匹配时间和新闻内容
|
||||||
|
news_matches = re.findall(r'\d+\.\s+\[([^\]]+)\]\s+(.+?)(?=\n\d+\.|$)', news_section, re.DOTALL)
|
||||||
|
for time_str, content in news_matches:
|
||||||
|
batch_important_news.append({
|
||||||
|
"datetime": time_str.strip(),
|
||||||
|
"content": content.strip()
|
||||||
|
})
|
||||||
|
|
||||||
|
batch_result = {
|
||||||
"batch_number": i + 1,
|
"batch_number": i + 1,
|
||||||
"news_count": len(batch_df),
|
"news_count": len(batch_df),
|
||||||
"analysis": batch_analysis
|
"analysis": batch_analysis,
|
||||||
})
|
"important_news": batch_important_news
|
||||||
|
}
|
||||||
|
|
||||||
|
batch_results.append(batch_result)
|
||||||
|
|
||||||
|
# 添加重要新闻到总列表
|
||||||
|
important_news.extend(batch_important_news)
|
||||||
|
|
||||||
|
# 保存批次缓存
|
||||||
|
if batch_cache_file:
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
with open(batch_cache_file, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(batch_result, f, ensure_ascii=False, indent=2)
|
||||||
|
print(f"已缓存批次分析结果到: {batch_cache_file}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"保存批次缓存文件时出错: {e}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"分析第{i + 1}批新闻时出错: {e}")
|
print(f"分析第{i + 1}批新闻时出错: {e}")
|
||||||
|
|
||||||
@ -236,6 +328,7 @@ def analyze_news_in_batches(news_df):
|
|||||||
- 提取3-5条最重要的国家层面政策(特别是中国的宏观经济政策)
|
- 提取3-5条最重要的国家层面政策(特别是中国的宏观经济政策)
|
||||||
- 每条政策用一句话概括核心内容
|
- 每条政策用一句话概括核心内容
|
||||||
- 标注政策发布部门/会议名称
|
- 标注政策发布部门/会议名称
|
||||||
|
- 对每条重要政策,提供对应的原始新闻来源
|
||||||
|
|
||||||
2. 【板块影响分析】
|
2. 【板块影响分析】
|
||||||
- 对每条重要政策,分析直接影响的相关行业板块
|
- 对每条重要政策,分析直接影响的相关行业板块
|
||||||
@ -251,17 +344,17 @@ def analyze_news_in_batches(news_df):
|
|||||||
|
|
||||||
### 一、核心政策摘要
|
### 一、核心政策摘要
|
||||||
1. [政策部门/会议] 政策核心内容
|
1. [政策部门/会议] 政策核心内容
|
||||||
|
- 来源新闻: [对应的原始新闻]
|
||||||
- 影响板块:利好:XXX;利空:XXX
|
- 影响板块:利好:XXX;利空:XXX
|
||||||
- 影响逻辑:...
|
- 影响逻辑:...
|
||||||
|
|
||||||
2. [政策部门/会议] 政策核心内容
|
2. [政策部门/会议] 政策核心内容
|
||||||
|
- 来源新闻: [对应的原始新闻]
|
||||||
- 影响板块:利好:XXX;利空:XXX
|
- 影响板块:利好:XXX;利空:XXX
|
||||||
- 影响逻辑:...
|
- 影响逻辑:...
|
||||||
|
|
||||||
### 二、综合市场影响
|
### 二、综合市场影响
|
||||||
[整体分析,包含市场情绪预判和关键时间节点提醒]
|
[整体分析,包含市场情绪预判和关键时间节点提醒]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
final_response = client.chat.completions.create(
|
final_response = client.chat.completions.create(
|
||||||
messages=[
|
messages=[
|
||||||
@ -273,16 +366,29 @@ def analyze_news_in_batches(news_df):
|
|||||||
model=openai_config['model'],
|
model=openai_config['model'],
|
||||||
temperature=0.2
|
temperature=0.2
|
||||||
)
|
)
|
||||||
|
|
||||||
final_analysis = final_response.choices[0].message.content
|
final_analysis = final_response.choices[0].message.content
|
||||||
|
|
||||||
return {
|
analysis_result = {
|
||||||
"news_count": len(news_df),
|
"news_count": len(news_df),
|
||||||
"batch_count": total_batches,
|
"batch_count": total_batches,
|
||||||
"time_range": f"{news_df['datetime'].iloc[0]} 至 {news_df['datetime'].iloc[-1]}" if not news_df.empty else "",
|
"time_range": f"{news_df['datetime'].iloc[0]} 至 {news_df['datetime'].iloc[-1]}" if not news_df.empty else "",
|
||||||
"batch_results": batch_results,
|
"batch_results": batch_results,
|
||||||
"final_analysis": final_analysis
|
"final_analysis": final_analysis,
|
||||||
|
"important_news": important_news # 保存所有重要新闻原文
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# 保存完整分析缓存
|
||||||
|
if cache_file:
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
with open(cache_file, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(analysis_result, f, ensure_ascii=False, indent=2)
|
||||||
|
print(f"已缓存完整分析结果到: {cache_file}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"保存缓存文件时出错: {e}")
|
||||||
|
|
||||||
|
return analysis_result
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"生成最终分析时出错: {e}")
|
print(f"生成最终分析时出错: {e}")
|
||||||
return None
|
return None
|
||||||
@ -308,31 +414,53 @@ def compare_trend_analysis(yesterday_analysis, today_analysis):
|
|||||||
base_url=openai_config['base_url']
|
base_url=openai_config['base_url']
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# 提取今天和昨天的重要新闻
|
||||||
|
yesterday_news = yesterday_analysis.get('important_news', [])
|
||||||
|
today_news = today_analysis.get('important_news', [])
|
||||||
|
|
||||||
|
# 整理新闻字符串
|
||||||
|
yesterday_news_str = ""
|
||||||
|
for i, news in enumerate(yesterday_news):
|
||||||
|
yesterday_news_str += f"{i + 1}. [{news['datetime']}] {news['content']}\n\n"
|
||||||
|
|
||||||
|
today_news_str = ""
|
||||||
|
for i, news in enumerate(today_news):
|
||||||
|
today_news_str += f"{i + 1}. [{news['datetime']}] {news['content']}\n\n"
|
||||||
|
|
||||||
# 构建对比分析提示
|
# 构建对比分析提示
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
你是一位资深财经分析师,请对比昨天和今天的政策分析结果,找出从弱势转为强势的板块,分析未来市场趋势:
|
你是一位资深财经分析师,请对比昨天和今天的政策分析结果,找出从弱势转为强势的板块,分析未来市场趋势:
|
||||||
|
|
||||||
### 昨天的分析:
|
### 昨天的分析:
|
||||||
{yesterday_analysis['final_analysis']}
|
{yesterday_analysis['final_analysis']}
|
||||||
|
|
||||||
|
### 昨天的重要新闻原文:
|
||||||
|
{yesterday_news_str}
|
||||||
|
|
||||||
### 今天的分析:
|
### 今天的分析:
|
||||||
{today_analysis['final_analysis']}
|
{today_analysis['final_analysis']}
|
||||||
|
|
||||||
|
### 今天的重要新闻原文:
|
||||||
|
{today_news_str}
|
||||||
|
|
||||||
请完成以下分析:
|
请完成以下分析:
|
||||||
|
|
||||||
1. 【弱转强板块识别】
|
1. 【弱转强板块识别】
|
||||||
- 对比昨天的弱势板块和今天的强势板块,找出明显从弱势转为强势的板块
|
- 对比昨天的弱势板块和今天的强势板块,找出明显从弱势转为强势的板块
|
||||||
- 分析这些板块转强的原因
|
- 分析这些板块转强的原因
|
||||||
- 评估这种转变的持续性
|
- 评估这种转变的持续性
|
||||||
|
- 关联对应的原始新闻,作为判断依据
|
||||||
|
|
||||||
2. 【政策趋势分析】
|
2. 【政策趋势分析】
|
||||||
- 分析昨天和今天的政策,提取政策方向的变化或延续
|
- 分析昨天和今天的政策,提取政策方向的变化或延续
|
||||||
- 判断政策组合对未来市场的影响
|
- 判断政策组合对未来市场的影响
|
||||||
- 预测可能被持续关注和推动的行业方向
|
- 预测可能被持续关注和推动的行业方向
|
||||||
|
- 引用相关新闻作为证据
|
||||||
|
|
||||||
3. 【投资机会预判】
|
3. 【投资机会预判】
|
||||||
- 基于弱转强的板块和政策趋势,预判未来1-2周可能出现的投资机会
|
- 基于弱转强的板块和政策趋势,预判未来1-2周可能出现的投资机会
|
||||||
- 提出值得重点关注的细分行业
|
- 提出值得重点关注的细分行业
|
||||||
|
- 提供相关的政策和新闻支持
|
||||||
|
|
||||||
请用以下结构化格式输出:
|
请用以下结构化格式输出:
|
||||||
|
|
||||||
@ -341,19 +469,20 @@ def compare_trend_analysis(yesterday_analysis, today_analysis):
|
|||||||
- 转变原因:...
|
- 转变原因:...
|
||||||
- 相关政策支持:...
|
- 相关政策支持:...
|
||||||
- 持续性评估:...
|
- 持续性评估:...
|
||||||
|
- 关键新闻依据:[引用原始新闻]
|
||||||
|
|
||||||
2. [板块名称]
|
2. [板块名称]
|
||||||
- 转变原因:...
|
- 转变原因:...
|
||||||
- 相关政策支持:...
|
- 相关政策支持:...
|
||||||
- 持续性评估:...
|
- 持续性评估:...
|
||||||
|
- 关键新闻依据:[引用原始新闻]
|
||||||
|
|
||||||
### 二、政策趋势变化
|
### 二、政策趋势变化
|
||||||
[分析政策方向的延续性和变化]
|
[分析政策方向的延续性和变化,引用相关新闻作为证据]
|
||||||
|
|
||||||
### 三、投资机会与建议
|
### 三、投资机会与建议
|
||||||
[提出具体的投资方向和关注重点]
|
[提出具体的投资方向和关注重点,引用相关政策和新闻]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
messages=[
|
messages=[
|
||||||
@ -365,13 +494,14 @@ def compare_trend_analysis(yesterday_analysis, today_analysis):
|
|||||||
model=openai_config['model'],
|
model=openai_config['model'],
|
||||||
temperature=0.3
|
temperature=0.3
|
||||||
)
|
)
|
||||||
|
|
||||||
trend_analysis = response.choices[0].message.content
|
trend_analysis = response.choices[0].message.content
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"yesterday_date": yesterday_analysis.get("time_range", "昨天"),
|
"yesterday_date": yesterday_analysis.get("time_range", "昨天"),
|
||||||
"today_date": today_analysis.get("time_range", "今天"),
|
"today_date": today_analysis.get("time_range", "今天"),
|
||||||
"trend_analysis": trend_analysis
|
"trend_analysis": trend_analysis,
|
||||||
|
"yesterday_news": yesterday_news,
|
||||||
|
"today_news": today_news
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"生成趋势分析时出错: {e}")
|
print(f"生成趋势分析时出错: {e}")
|
||||||
@ -391,7 +521,6 @@ def main():
|
|||||||
# 获取昨天的新闻,只有在不存在时才获取
|
# 获取昨天的新闻,只有在不存在时才获取
|
||||||
yesterday_file_exists = False
|
yesterday_file_exists = False
|
||||||
temp_dir = ensure_temp_dir()
|
temp_dir = ensure_temp_dir()
|
||||||
|
|
||||||
for file in os.listdir(temp_dir):
|
for file in os.listdir(temp_dir):
|
||||||
if file.startswith(f"{yesterday_file}_") and file.endswith(".csv"):
|
if file.startswith(f"{yesterday_file}_") and file.endswith(".csv"):
|
||||||
yesterday_file_exists = True
|
yesterday_file_exists = True
|
||||||
@ -407,7 +536,6 @@ def main():
|
|||||||
for file in os.listdir(temp_dir):
|
for file in os.listdir(temp_dir):
|
||||||
if file.startswith(f"{yesterday_file}_") and file.endswith(".csv"):
|
if file.startswith(f"{yesterday_file}_") and file.endswith(".csv"):
|
||||||
yesterday_files.append(os.path.join(temp_dir, file))
|
yesterday_files.append(os.path.join(temp_dir, file))
|
||||||
|
|
||||||
dfs = []
|
dfs = []
|
||||||
for file in yesterday_files:
|
for file in yesterday_files:
|
||||||
try:
|
try:
|
||||||
@ -415,65 +543,86 @@ def main():
|
|||||||
dfs.append(df)
|
dfs.append(df)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"读取文件{file}时出错: {e}")
|
print(f"读取文件{file}时出错: {e}")
|
||||||
|
|
||||||
if dfs:
|
if dfs:
|
||||||
yesterday_news = pd.concat(dfs, ignore_index=True)
|
yesterday_news = pd.concat(dfs, ignore_index=True)
|
||||||
if 'datetime' in yesterday_news.columns and 'content' in yesterday_news.columns:
|
if 'datetime' in yesterday_news.columns and 'content' in yesterday_news.columns:
|
||||||
yesterday_news = yesterday_news[['datetime', 'content']]
|
yesterday_news = yesterday_news[['datetime', 'content']]
|
||||||
|
|
||||||
# 分析今天和昨天的新闻
|
# 分析今天和昨天的新闻,使用缓存机制
|
||||||
today_analysis = None
|
today_analysis = None
|
||||||
yesterday_analysis = None
|
yesterday_analysis = None
|
||||||
|
|
||||||
if today_news is not None:
|
if today_news is not None:
|
||||||
print(f"获取到 {len(today_news)} 条今日新闻,正在分析...")
|
print(f"获取到 {len(today_news)} 条今日新闻,正在分析...")
|
||||||
today_analysis = analyze_news_in_batches(today_news)
|
today_analysis = analyze_news_in_batches(today_news, today, force_analyze=False)
|
||||||
|
|
||||||
# 保存今天的原始新闻和分析结果
|
# 保存今天的原始新闻和分析结果
|
||||||
today_news.to_csv(f"news_raw_{today_file}.csv", index=False, encoding='utf-8-sig')
|
today_news.to_csv(f"temp/news_raw_{today_file}.csv", index=False, encoding='utf-8-sig')
|
||||||
|
|
||||||
if today_analysis:
|
if today_analysis:
|
||||||
with open(f"news_analysis_{today_file}.txt", "w", encoding="utf-8") as f:
|
with open(f"temp/news_analysis_{today_file}.txt", "w", encoding="utf-8") as f:
|
||||||
f.write(
|
f.write(
|
||||||
f"分析范围: {today_analysis['news_count']} 条新闻,分成 {today_analysis['batch_count']} 批处理\n")
|
f"分析范围: {today_analysis['news_count']} 条新闻,分成 {today_analysis['batch_count']} 批处理\n")
|
||||||
f.write(f"时间范围: {today_analysis['time_range']}\n\n")
|
f.write(f"时间范围: {today_analysis['time_range']}\n\n")
|
||||||
f.write("最终分析结果:\n")
|
f.write("最终分析结果:\n")
|
||||||
f.write("-" * 80 + "\n")
|
f.write("-" * 80 + "\n")
|
||||||
f.write(today_analysis['final_analysis'])
|
f.write(today_analysis['final_analysis'])
|
||||||
f.write("\n" + "-" * 80 + "\n")
|
f.write("\n" + "-" * 80 + "\n\n")
|
||||||
|
|
||||||
print(f"今日分析结果已保存到 news_analysis_{today_file}.txt")
|
# 添加重要新闻原文
|
||||||
|
f.write("重要新闻原文:\n")
|
||||||
|
f.write("-" * 80 + "\n")
|
||||||
|
for i, news in enumerate(today_analysis.get('important_news', [])):
|
||||||
|
f.write(f"{i + 1}. [{news['datetime']}] {news['content']}\n\n")
|
||||||
|
f.write("-" * 80 + "\n")
|
||||||
|
|
||||||
|
print(f"今日分析结果已保存到 temp/news_analysis_{today_file}.txt")
|
||||||
else:
|
else:
|
||||||
print("无法获取今天的新闻数据")
|
print("无法获取今天的新闻数据")
|
||||||
|
|
||||||
if yesterday_news is not None:
|
if yesterday_news is not None:
|
||||||
print(f"获取到 {len(yesterday_news)} 条昨日新闻,正在分析...")
|
print(f"获取到 {len(yesterday_news)} 条昨日新闻,正在分析...")
|
||||||
yesterday_analysis = analyze_news_in_batches(yesterday_news)
|
yesterday_analysis = analyze_news_in_batches(yesterday_news, yesterday, force_analyze=False)
|
||||||
|
|
||||||
# 保存昨天的原始新闻和分析结果
|
# 保存昨天的原始新闻和分析结果
|
||||||
yesterday_news.to_csv(f"news_raw_{yesterday_file}.csv", index=False, encoding='utf-8-sig')
|
yesterday_news.to_csv(f"temp/news_raw_{yesterday_file}.csv", index=False, encoding='utf-8-sig')
|
||||||
|
|
||||||
if yesterday_analysis:
|
if yesterday_analysis:
|
||||||
with open(f"news_analysis_{yesterday_file}.txt", "w", encoding="utf-8") as f:
|
with open(f"temp/news_analysis_{yesterday_file}.txt", "w", encoding="utf-8") as f:
|
||||||
f.write(
|
f.write(
|
||||||
f"分析范围: {yesterday_analysis['news_count']} 条新闻,分成 {yesterday_analysis['batch_count']} 批处理\n")
|
f"分析范围: {yesterday_analysis['news_count']} 条新闻,分成 {yesterday_analysis['batch_count']} 批处理\n")
|
||||||
f.write(f"时间范围: {yesterday_analysis['time_range']}\n\n")
|
f.write(f"时间范围: {yesterday_analysis['time_range']}\n\n")
|
||||||
f.write("最终分析结果:\n")
|
f.write("最终分析结果:\n")
|
||||||
f.write("-" * 80 + "\n")
|
f.write("-" * 80 + "\n")
|
||||||
f.write(yesterday_analysis['final_analysis'])
|
f.write(yesterday_analysis['final_analysis'])
|
||||||
f.write("\n" + "-" * 80 + "\n")
|
f.write("\n" + "-" * 80 + "\n\n")
|
||||||
|
|
||||||
print(f"昨日分析结果已保存到 news_analysis_{yesterday_file}.txt")
|
# 添加重要新闻原文
|
||||||
|
f.write("重要新闻原文:\n")
|
||||||
|
f.write("-" * 80 + "\n")
|
||||||
|
for i, news in enumerate(yesterday_analysis.get('important_news', [])):
|
||||||
|
f.write(f"{i + 1}. [{news['datetime']}] {news['content']}\n\n")
|
||||||
|
f.write("-" * 80 + "\n")
|
||||||
|
|
||||||
|
print(f"昨日分析结果已保存到 temp/news_analysis_{yesterday_file}.txt")
|
||||||
else:
|
else:
|
||||||
# 尝试读取昨天的分析结果文件
|
# 尝试读取昨天的分析结果文件
|
||||||
try:
|
try:
|
||||||
with open(f"news_analysis_{yesterday_file}.txt", "r", encoding="utf-8") as f:
|
# 首先尝试读取JSON缓存文件
|
||||||
content = f.read()
|
cache_file = os.path.join(temp_dir, f"analysis_cache_{yesterday_file}.json")
|
||||||
yesterday_analysis = {
|
if os.path.exists(cache_file):
|
||||||
"final_analysis": content,
|
import json
|
||||||
"time_range": yesterday
|
with open(cache_file, 'r', encoding='utf-8') as f:
|
||||||
}
|
yesterday_analysis = json.load(f)
|
||||||
print(f"已从文件中读取昨日分析结果")
|
print(f"已从缓存中读取昨日分析结果")
|
||||||
|
else:
|
||||||
|
# 如果没有缓存,尝试读取文本文件
|
||||||
|
with open(f"temp/news_analysis_{yesterday_file}.txt", "r", encoding="utf-8") as f:
|
||||||
|
content = f.read()
|
||||||
|
yesterday_analysis = {
|
||||||
|
"final_analysis": content.split("-" * 80)[1].strip(),
|
||||||
|
"time_range": yesterday,
|
||||||
|
"important_news": [] # 从文本文件中难以结构化提取新闻,置为空列表
|
||||||
|
}
|
||||||
|
print(f"已从文本文件中读取昨日分析结果")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"无法读取昨日分析结果: {e}")
|
print(f"无法读取昨日分析结果: {e}")
|
||||||
|
|
||||||
@ -481,17 +630,29 @@ def main():
|
|||||||
if today_analysis and yesterday_analysis:
|
if today_analysis and yesterday_analysis:
|
||||||
print("正在进行今日与昨日的趋势对比分析...")
|
print("正在进行今日与昨日的趋势对比分析...")
|
||||||
trend_analysis = compare_trend_analysis(yesterday_analysis, today_analysis)
|
trend_analysis = compare_trend_analysis(yesterday_analysis, today_analysis)
|
||||||
|
|
||||||
if trend_analysis:
|
if trend_analysis:
|
||||||
# 保存趋势分析结果
|
# 保存趋势分析结果
|
||||||
with open(f"trend_analysis_{today_file}.txt", "w", encoding="utf-8") as f:
|
with open(f"temp/trend_analysis_{today_file}.txt", "w", encoding="utf-8") as f:
|
||||||
f.write(f"对比分析范围: 昨日({yesterday}) 与 今日({today})\n\n")
|
f.write(f"对比分析范围: 昨日({yesterday}) 与 今日({today})\n\n")
|
||||||
f.write("趋势分析结果:\n")
|
f.write("趋势分析结果:\n")
|
||||||
f.write("-" * 80 + "\n")
|
f.write("-" * 80 + "\n")
|
||||||
f.write(trend_analysis['trend_analysis'])
|
f.write(trend_analysis['trend_analysis'])
|
||||||
f.write("\n" + "-" * 80 + "\n")
|
f.write("\n" + "-" * 80 + "\n\n")
|
||||||
|
|
||||||
print(f"趋势分析结果已保存到 trend_analysis_{today_file}.txt")
|
# 添加弱转强的新闻原文供参考
|
||||||
|
f.write("昨日重要新闻原文:\n")
|
||||||
|
f.write("-" * 80 + "\n")
|
||||||
|
for i, news in enumerate(trend_analysis.get('yesterday_news', [])):
|
||||||
|
f.write(f"{i + 1}. [{news['datetime']}] {news['content']}\n\n")
|
||||||
|
f.write("-" * 80 + "\n\n")
|
||||||
|
|
||||||
|
f.write("今日重要新闻原文:\n")
|
||||||
|
f.write("-" * 80 + "\n")
|
||||||
|
for i, news in enumerate(trend_analysis.get('today_news', [])):
|
||||||
|
f.write(f"{i + 1}. [{news['datetime']}] {news['content']}\n\n")
|
||||||
|
f.write("-" * 80 + "\n")
|
||||||
|
|
||||||
|
print(f"趋势分析结果已保存到 temp/trend_analysis_{today_file}.txt")
|
||||||
|
|
||||||
# 打印趋势分析结果
|
# 打印趋势分析结果
|
||||||
print("\n=== 今日与昨日趋势对比分析 ===\n")
|
print("\n=== 今日与昨日趋势对比分析 ===\n")
|
||||||
|
Loading…
Reference in New Issue
Block a user