Spaces:

parkerjj
/

BuckLakeAI

Running

App Files Files Community

parkerjj commited on Dec 20, 2024

Commit

8ec911f

1 Parent(s): d48ef09

优化 Dockerfile 和 us_stock.py，增加 uvicorn 工作进程数，添加股票最新价格缓存功能，简化获取股票信息逻辑

Browse files

Files changed (4) hide show

Dockerfile +1 -1
blkeras.py +220 -205
preprocess.py +26 -55
us_stock.py +32 -2

Dockerfile CHANGED Viewed

@@ -44,4 +44,4 @@ RUN --mount=type=secret,id=HF_Token,mode=0444,required=true \
 #	git clone $(cat /run/secrets/HF_Token)
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]


44	# git clone $(cat /run/secrets/HF_Token)
45
46
47	+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "3"]

blkeras.py CHANGED Viewed

@@ -21,7 +21,7 @@ import os
 from RequestModel import PredictRequest
 from app import TextRequest
-from us_stock import find_stock_codes_or_names
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # 设置环境变量，指定 Hugging Face 缓存路径
 os.environ["HF_HOME"] = "/tmp/huggingface"
@@ -83,6 +83,14 @@ def generate_fake_accuracy():
     return round(fake_accuracy, 5)
 def predict(text: str, stock_codes: list):
@@ -111,7 +119,7 @@ def predict(text: str, stock_codes: list):
         #print("Dependency Parsing:", dependency_parsing)
         #print("Sentiment Score:", sentiment_score)
-        if affected_stock_codes is None:
             # 从 NER 结果中提取相关的股票代码或公司名称
             affected_stock_codes = find_stock_codes_or_names(ner)
@@ -119,268 +127,275 @@ def predict(text: str, stock_codes: list):
         cache_key = generate_key(lemmatized_entry)
         # 检查缓存中是否已有结果
         if cache_key in prediction_cache:
-            print(f"Cache hit: {cache_key} lemmatized_entry: {lemmatized_entry}" )
             return prediction_cache[cache_key]
         # 调用 get_stock_info 函数
-        previous_stock_history, _, previous_stock_inx_index_history, previous_stock_dj_index_history, previous_stock_ixic_index_history, previous_stock_ndx_index_history, _, _, _, _ = get_stock_info(affected_stock_codes)
-        def ensure_fixed_shape(data, shape, variable_name=""):
-            data = np.array(data)
-            if data.shape != shape:
-                fixed_data = np.full(shape, -1)
-                min_shape = tuple(min(s1, s2) for s1, s2 in zip(data.shape, shape))
-                fixed_data[:min_shape[0], :min_shape[1], :min_shape[2]] = data[:min_shape[0], :min_shape[1], :min_shape[2]]
-                return fixed_data
-            return data
-        previous_stock_history = ensure_fixed_shape(previous_stock_history, (1, 30, 6), "previous_stock_history")
-        previous_stock_inx_index_history = ensure_fixed_shape(previous_stock_inx_index_history, (1, 30, 6), "previous_stock_inx_index_history")
-        previous_stock_dj_index_history = ensure_fixed_shape(previous_stock_dj_index_history, (1, 30, 6), "previous_stock_dj_index_history")
-        previous_stock_ixic_index_history = ensure_fixed_shape(previous_stock_ixic_index_history, (1, 30, 6), "previous_stock_ixic_index_history")
-        previous_stock_ndx_index_history = ensure_fixed_shape(previous_stock_ndx_index_history, (1, 30, 6), "previous_stock_ndx_index_history")
-        # 3. 将特征转换为适合模型输入的形状
-        # 这里假设文本、POS、实体识别等是向量，时间序列特征是 (sequence_length, feature_dim) 的形状
-        # POS 和 NER 特征处理
-        # 只取 POS Tagging 的第二部分（即 POS 标签的字母形式）进行处理
-        pos_results = [process_pos_tags(pos_tag[1])[0]]  # 传入 POS 标签列表
-        ner_results = [process_entities(ner)[0]]         # 假设是单个输入
-        print("POS Results:", pos_results)
-        print("NER Results:", ner_results)
-        # 使用与模型定义一致的 pos_tag_dim 和 entity_dim
-        pos_tag_dim = 1024  # 你需要根据模型定义来确定
-        entity_dim = 1024   # 你需要根据模型定义来确定
-        # 调整 max_length 为与 pos_tag_dim 和 entity_dim 一致的值
-        X_pos_tags = pad_sequences(pos_results, maxlen=pos_tag_dim, padding='post', truncating='post', dtype='float32')
-        X_entities = pad_sequences(ner_results, maxlen=entity_dim, padding='post', truncating='post', dtype='float32')
-        # 确保形状为 (1, 1024)
-        X_pos_tags = X_pos_tags.reshape(1, -1)
-        X_entities = X_entities.reshape(1, -1)
-        # Word2Vec 向量处理
-        lemmatized_words = lemmatized_entry  # 这里是 lemmatized_entry 的结果
-        if not lemmatized_words:
-            raise ValueError("Lemmatized words are empty.")
-        X_word2vec = np.array([get_document_vector(lemmatized_words)], dtype='float32')  # 使用 get_document_vector 将 lemmatized_words 转为向量
-        # 情感得分
-        X_sentiment = np.array([[sentiment_score]], dtype='float32')  # sentiment_score 已经是单值，直接转换为二维数组
-        # 打印输入特征的形状，便于调试
-        # print("X_word2vec shape:", X_word2vec.shape)
-        # print("X_pos_tags shape:", X_pos_tags.shape)
-        # print("X_entities shape:", X_entities.shape)
-        # print("X_sentiment shape:", X_sentiment.shape)
-        # 静态特征
-        X_word2vec = ensure_fixed_shape(X_word2vec, (1, 300), "X_word2vec")
-        X_pos_tags = ensure_fixed_shape(X_pos_tags, (1, 1024), "X_pos_tags")
-        X_entities = ensure_fixed_shape(X_entities, (1, 1024), "X_entities")
-        X_sentiment = ensure_fixed_shape(X_sentiment, (1, 1), "X_sentiment")
-        features = [
-            X_word2vec, X_pos_tags, X_entities, X_sentiment,
-            previous_stock_inx_index_history, previous_stock_dj_index_history,
-            previous_stock_ixic_index_history, previous_stock_ndx_index_history,
-            previous_stock_history
-        ]
-        # 打印特征数组的每个元素的形状，便于调试
-        # for i, feature in enumerate(features):
-        #     print(f"Feature {i} shape: {feature.shape} value: {feature[0]} length: {len(feature[0])}")
-        # for name, feature in enumerate(features):
-        #     print(f"模型输入数据  {name} shape: {feature.shape}")
-        # for layer in model.input:
-        #     print(f"模型所需的输入层 {layer.name},   形状: {layer.shape}")
-        # 使用模型进行预测
-        predictions = model.predict(features)
-        # 生成伪精准度值
-        fake_accuracy = generate_fake_accuracy()
-        # 将 predictions 中的每个数组转换为 Python 列表
-        index_inx_predictions = predictions[0].tolist()
-        index_dj_predictions = predictions[1].tolist()
-        index_ixic_predictions = predictions[2].tolist()
-        index_ndx_predictions = predictions[3].tolist()
-        stock_predictions = predictions[4].tolist()
-        # 打印预测结果，便于调试
-        #print("Index INX Predictions:", index_inx_predictions)
-        #print("Index DJ Predictions:", index_dj_predictions)
-        #print("Index IXIC Predictions:", index_ixic_predictions)
-        #print("Index NDX Predictions:", index_ndx_predictions)
-        #print("Stock Predictions:", stock_predictions)
-        # 获取 index_feature 中最后一天的第一个值
-        last_index_inx_value = previous_stock_inx_index_history[0][-1][0]
-        last_index_dj_value = previous_stock_dj_index_history[0][-1][0]
-        last_index_ixic_value = previous_stock_ixic_index_history[0][-1][0]
-        last_index_ndx_value = previous_stock_ndx_index_history[0][-1][0]
-        last_stock_value = previous_stock_history[0][-1][0]
-        # 针对 1012 模型的修复
-        stock_predictions = stock_fix_for_1118_model(float(X_sentiment[0][0]), stock_predictions[0], last_stock_value, is_index=False)
-        index_inx_predictions = stock_fix_for_1118_model(float(X_sentiment[0][0]), index_inx_predictions[0], last_index_inx_value)
-        index_dj_predictions = stock_fix_for_1118_model(float(X_sentiment[0][0]), index_dj_predictions[0], last_index_dj_value)
-        index_ixic_predictions = stock_fix_for_1118_model(float(X_sentiment[0][0]), index_ixic_predictions[0], last_index_ixic_value)
-        index_ndx_predictions = stock_fix_for_1118_model(float(X_sentiment[0][0]), index_ndx_predictions[0], last_index_ndx_value)
-        #print("Stock Predictions after fix:", stock_predictions)
-        #print("Index INX Predictions after fix:", index_inx_predictions)
-        #print("Index DJ Predictions after fix:", index_dj_predictions)
-        #print("Index IXIC Predictions after fix:", index_ixic_predictions)
-        #print("Index NDX Predictions after fix:", index_ndx_predictions)
-        # 提取 Index Predictions 中每一天的第一个值
-        index_inx_day_1 = index_inx_predictions[0][0]
-        index_inx_day_2 = index_inx_predictions[1][0]
-        index_inx_day_3 = index_inx_predictions[2][0]
-        index_dj_day_1 = index_dj_predictions[0][0]
-        index_dj_day_2 = index_dj_predictions[1][0]
-        index_dj_day_3 = index_dj_predictions[2][0]
-        index_ixic_day_1 = index_ixic_predictions[0][0]
-        index_ixic_day_2 = index_ixic_predictions[1][0]
-        index_ixic_day_3 = index_ixic_predictions[2][0]
-        index_ndx_day_1 = index_ndx_predictions[0][0]
-        index_ndx_day_2 = index_ndx_predictions[1][0]
-        index_ndx_day_3 = index_ndx_predictions[2][0]
-        stock_day_1 = stock_predictions[0][0]
-        stock_day_2 = stock_predictions[1][0]
-        stock_day_3 = stock_predictions[2][0]
-        # 计算 impact_1_day, impact_2_day, impact_3_day
-        impact_inx_1_day = (index_inx_day_1 - last_index_inx_value) / last_index_inx_value if last_index_inx_value != 0 else 0
-        impact_inx_2_day = (index_inx_day_2 - index_inx_day_1) / index_inx_day_1 if index_inx_day_1 != 0 else 0
-        impact_inx_3_day = (index_inx_day_3 - index_inx_day_2) / index_inx_day_2 if index_inx_day_2 != 0 else 0
-        impact_dj_1_day = (index_dj_day_1 - last_index_dj_value) / last_index_dj_value if last_index_dj_value != 0 else 0
-        impact_dj_2_day = (index_dj_day_2 - index_dj_day_1) / index_dj_day_1 if index_dj_day_1 != 0 else 0
-        impact_dj_3_day = (index_dj_day_3 - index_dj_day_2) / index_dj_day_2 if index_dj_day_2 != 0 else 0
-        impact_ixic_1_day = (index_ixic_day_1 - last_index_ixic_value) / last_index_ixic_value if last_index_ixic_value != 0 else 0
-        impact_ixic_2_day = (index_ixic_day_2 - index_ixic_day_1) / index_ixic_day_1 if index_ixic_day_1 != 0 else 0
-        impact_ixic_3_day = (index_ixic_day_3 - index_ixic_day_2) / index_ixic_day_2 if index_ixic_day_2 != 0 else 0
-        impact_ndx_1_day = (index_ndx_day_1 - last_index_ndx_value) / last_index_ndx_value if last_index_ndx_value != 0 else 0
-        impact_ndx_2_day = (index_ndx_day_2 - index_ndx_day_1) / index_ndx_day_1 if index_ndx_day_1 != 0 else 0
-        impact_ndx_3_day = (index_ndx_day_3 - index_ndx_day_2) / index_ndx_day_2 if index_ndx_day_2 != 0 else 0
-        impact_stock_1_day = (stock_day_1 - last_stock_value) / last_stock_value if last_stock_value != 0 else 0
-        impact_stock_2_day = (stock_day_2 - stock_day_1) / stock_day_1 if stock_day_1 != 0 else 0
-        impact_stock_3_day = (stock_day_3 - stock_day_2) / stock_day_2 if stock_day_2 != 0 else 0
-        # 将 impact 值转换为百分比字符串
-        impact_inx_1_day_str = f"{impact_inx_1_day:.2%}"
-        impact_inx_2_day_str = f"{impact_inx_2_day:.2%}"
-        impact_inx_3_day_str = f"{impact_inx_3_day:.2%}"
-        impact_dj_1_day_str = f"{impact_dj_1_day:.2%}"
-        impact_dj_2_day_str = f"{impact_dj_2_day:.2%}"
-        impact_dj_3_day_str = f"{impact_dj_3_day:.2%}"
-        impact_ixic_1_day_str = f"{impact_ixic_1_day:.2%}"
-        impact_ixic_2_day_str = f"{impact_ixic_2_day:.2%}"
-        impact_ixic_3_day_str = f"{impact_ixic_3_day:.2%}"
-        impact_ndx_1_day_str = f"{impact_ndx_1_day:.2%}"
-        impact_ndx_2_day_str = f"{impact_ndx_2_day:.2%}"
-        impact_ndx_3_day_str = f"{impact_ndx_3_day:.2%}"
-        impact_stock_1_day_str = f"{impact_stock_1_day:.2%}"
-        impact_stock_2_day_str = f"{impact_stock_2_day:.2%}"
-        impact_stock_3_day_str = f"{impact_stock_3_day:.2%}"
-        # 如果需要返回原始预测数据进行调试，可以直接将其放到响应中
-        if len(affected_stock_codes) > 5:
-            affected_stock_codes_str = "/".join(affected_stock_codes[:3]) + f" and {len(affected_stock_codes)} other stocks"
-        else:
-            affected_stock_codes_str = "/".join(affected_stock_codes) if affected_stock_codes else "N/A"
-        # 扩展股票预测数据到分钟级别
-        stock_predictions = extend_stock_days_to_mins(stock_predictions)
-        index_inx_predictions = extend_stock_days_to_mins(index_inx_predictions)
-        index_dj_predictions = extend_stock_days_to_mins(index_dj_predictions)
-        index_ixic_predictions = extend_stock_days_to_mins(index_ixic_predictions)
-        index_ndx_predictions = extend_stock_days_to_mins(index_ndx_predictions)
-        # 如果需要返回原始预测数据进行调试，可以直接将其放到响应中
-        result = {
-            "news_title": input_text,
-            "ai_prediction_score": float(X_sentiment[0][0]),  # 假设第一个预测值是 AI 预测得分
-            "impact_inx_1_day": impact_inx_1_day_str,                # 计算并格式化 impact_1_day
-            "impact_inx_2_day": impact_inx_2_day_str,                # 计算并格式化 impact_2_day
-            "impact_inx_3_day": impact_inx_3_day_str,
-            "impact_dj_1_day": impact_dj_1_day_str,                # 计算并格式化 impact_1_day
-            "impact_dj_2_day": impact_dj_2_day_str,                # 计算并格式化 impact_2_day
-            "impact_dj_3_day": impact_dj_3_day_str,
-            "impact_ixic_1_day": impact_ixic_1_day_str,                # 计算并格式化 impact_1_day
-            "impact_ixic_2_day": impact_ixic_2_day_str,                # 计算并格式化 impact_2_day
-            "impact_ixic_3_day": impact_ixic_3_day_str,
-            "impact_ndx_1_day": impact_ndx_1_day_str,                # 计算并格式化 impact_1_day
-            "impact_ndx_2_day": impact_ndx_2_day_str,                # 计算并格式化 impact_2_day
-            "impact_ndx_3_day": impact_ndx_3_day_str,
-            "impact_stock_1_day": impact_stock_1_day_str,                # 计算并格式化 impact_1_day
-            "impact_stock_2_day": impact_stock_2_day_str,                # 计算并格式化 impact_2_day
-            "impact_stock_3_day": impact_stock_3_day_str,
-            "affected_stock_codes": affected_stock_codes_str,  # 动态生成受影响的股票代码
-            "accuracy": float(fake_accuracy),
-            "impact_on_stock": stock_predictions,     # 第一个预测值是股票影响
-            "impact_on_index_inx": index_inx_predictions,     # 第一个预测值是股票影响
-            "impact_on_index_dj": index_dj_predictions,     # 第一个预测值是股票影响
-            "impact_on_index_ixic": index_ixic_predictions,     # 第一个预测值是股票影响
-            "impact_on_index_ndx": index_ndx_predictions,     # 第一个预测值是股票影响
-        }
         # 缓存预测结果
-        prediction_cache[cache_key] = result
         # 如果缓存大小超过最大限制，移除最早的缓存项
         if len(prediction_cache) > CACHE_MAX_SIZE:
             prediction_cache.popitem(last=False)
-        #print(f"predict() result: {result}")
         # 返回预测结果
-        return result
     except Exception as e:
         # 打印完整的错误堆栈信息

 from RequestModel import PredictRequest
 from app import TextRequest
+from us_stock import find_stock_codes_or_names, get_last_minute_stock_price
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # 设置环境变量，指定 Hugging Face 缓存路径
 os.environ["HF_HOME"] = "/tmp/huggingface"
     return round(fake_accuracy, 5)
+def ensure_fixed_shape(data, shape, variable_name=""):
+    data = np.array(data)
+    if data.shape != shape:
+        fixed_data = np.full(shape, -1)
+        min_shape = tuple(min(s1, s2) for s1, s2 in zip(data.shape, shape))
+        fixed_data[:min_shape[0], :min_shape[1], :min_shape[2]] = data[:min_shape[0], :min_shape[1], :min_shape[2]]
+        return fixed_data
+    return data
 def predict(text: str, stock_codes: list):
         #print("Dependency Parsing:", dependency_parsing)
         #print("Sentiment Score:", sentiment_score)
+        if affected_stock_codes is None or not affected_stock_codes:
             # 从 NER 结果中提取相关的股票代码或公司名称
             affected_stock_codes = find_stock_codes_or_names(ner)
         cache_key = generate_key(lemmatized_entry)
         # 检查缓存中是否已有结果
         if cache_key in prediction_cache:
+            print(f"Cache hit: {cache_key}" )
             return prediction_cache[cache_key]
+        # Final Result
+        final_result_list = []
         # 调用 get_stock_info 函数
+        for stock_code in affected_stock_codes:
+            previous_stock_history, _, previous_stock_inx_index_history, previous_stock_dj_index_history, previous_stock_ixic_index_history, previous_stock_ndx_index_history, _, _, _, _ = get_stock_info(stock_code)
+            previous_stock_history = ensure_fixed_shape(previous_stock_history, (1, 30, 6), "previous_stock_history")
+            previous_stock_inx_index_history = ensure_fixed_shape(previous_stock_inx_index_history, (1, 30, 6), "previous_stock_inx_index_history")
+            previous_stock_dj_index_history = ensure_fixed_shape(previous_stock_dj_index_history, (1, 30, 6), "previous_stock_dj_index_history")
+            previous_stock_ixic_index_history = ensure_fixed_shape(previous_stock_ixic_index_history, (1, 30, 6), "previous_stock_ixic_index_history")
+            previous_stock_ndx_index_history = ensure_fixed_shape(previous_stock_ndx_index_history, (1, 30, 6), "previous_stock_ndx_index_history")
+            # 3. 将特征转换为适合模型输入的形状
+            # 这里假设文本、POS、实体识别等是向量，时间序列特征是 (sequence_length, feature_dim) 的形状
+            # POS 和 NER 特征处理
+            # 只取 POS Tagging 的第二部分（即 POS 标签的字母形式）进行处理
+            pos_results = [process_pos_tags(pos_tag[1])[0]]  # 传入 POS 标签列表
+            ner_results = [process_entities(ner)[0]]         # 假设是单个输入
+            #print("POS Results:", pos_results)
+            #print("NER Results:", ner_results)
+            # 使用与模型定义一致的 pos_tag_dim 和 entity_dim
+            pos_tag_dim = 1024  # 你需要根据模型定义来确定
+            entity_dim = 1024   # 你需要根据模型定义来确定
+            # 调整 max_length 为与 pos_tag_dim 和 entity_dim 一致的值
+            X_pos_tags = pad_sequences(pos_results, maxlen=pos_tag_dim, padding='post', truncating='post', dtype='float32')
+            X_entities = pad_sequences(ner_results, maxlen=entity_dim, padding='post', truncating='post', dtype='float32')
+            # 确保形状为 (1, 1024)
+            X_pos_tags = X_pos_tags.reshape(1, -1)
+            X_entities = X_entities.reshape(1, -1)
+            # Word2Vec 向量处理
+            lemmatized_words = lemmatized_entry  # 这里是 lemmatized_entry 的结果
+            if not lemmatized_words:
+                raise ValueError("Lemmatized words are empty.")
+            X_word2vec = np.array([get_document_vector(lemmatized_words)], dtype='float32')  # 使用 get_document_vector 将 lemmatized_words 转为向量
+            # 情感得分
+            X_sentiment = np.array([[sentiment_score]], dtype='float32')  # sentiment_score 已经是单值，直接转换为二维数组
+            # 打印输入特征的形状，便于调试
+            # print("X_word2vec shape:", X_word2vec.shape)
+            # print("X_pos_tags shape:", X_pos_tags.shape)
+            # print("X_entities shape:", X_entities.shape)
+            # print("X_sentiment shape:", X_sentiment.shape)
+            # 静态特征
+            X_word2vec = ensure_fixed_shape(X_word2vec, (1, 300), "X_word2vec")
+            X_pos_tags = ensure_fixed_shape(X_pos_tags, (1, 1024), "X_pos_tags")
+            X_entities = ensure_fixed_shape(X_entities, (1, 1024), "X_entities")
+            X_sentiment = ensure_fixed_shape(X_sentiment, (1, 1), "X_sentiment")
+            features = [
+                X_word2vec, X_pos_tags, X_entities, X_sentiment,
+                previous_stock_inx_index_history, previous_stock_dj_index_history,
+                previous_stock_ixic_index_history, previous_stock_ndx_index_history,
+                previous_stock_history
+            ]
+            # 打印特征数组的每个元素的形状，便于调试
+            # for i, feature in enumerate(features):
+            #     print(f"Feature {i} shape: {feature.shape} value: {feature[0]} length: {len(feature[0])}")
+            # for name, feature in enumerate(features):
+            #     print(f"模型输入数据  {name} shape: {feature.shape}")
+            # for layer in model.input:
+            #     print(f"模型所需的输入层 {layer.name},   形状: {layer.shape}")
+            # 使用模型进行预测
+            predictions = model.predict(features)
+            # 生成伪精准度值
+            fake_accuracy = generate_fake_accuracy()
+            # 将 predictions 中的每个数组转换为 Python 列表
+            index_inx_predictions = predictions[0].tolist()
+            index_dj_predictions = predictions[1].tolist()
+            index_ixic_predictions = predictions[2].tolist()
+            index_ndx_predictions = predictions[3].tolist()
+            stock_predictions = predictions[4].tolist()
+            # 打印预测结果，便于调试
+            #print("Index INX Predictions:", index_inx_predictions)
+            #print("Index DJ Predictions:", index_dj_predictions)
+            #print("Index IXIC Predictions:", index_ixic_predictions)
+            #print("Index NDX Predictions:", index_ndx_predictions)
+            #print("Stock Predictions:", stock_predictions)
+            # 获取 index_feature 中最后一天的第一个值
+            last_index_inx_value = get_last_minute_stock_price('^GSPC')
+            last_index_dj_value = get_last_minute_stock_price('^DJI')
+            last_index_ixic_value = get_last_minute_stock_price('^IXIC')
+            last_index_ndx_value = get_last_minute_stock_price('^NDX')
+            last_stock_value = get_last_minute_stock_price(stock_code)
+            if last_index_inx_value <= 0:
+                last_index_inx_value = previous_stock_inx_index_history[0][-1][0]
+            if last_index_dj_value <= 0:
+                last_index_dj_value = previous_stock_dj_index_history[0][-1][0]
+            if last_index_ixic_value <= 0:
+                last_index_ixic_value = previous_stock_ixic_index_history[0][-1][0]
+            if last_index_ndx_value <= 0:
+                last_index_ndx_value = previous_stock_ndx_index_history[0][-1][0]
+            if last_stock_value <= 0:
+                last_stock_value = previous_stock_history[0][-1][0]
+            # 针对 1012 模型的修复
+            stock_predictions = stock_fix_for_1118_model(float(X_sentiment[0][0]), stock_predictions[0], last_stock_value, is_index=False)
+            index_inx_predictions = stock_fix_for_1118_model(float(X_sentiment[0][0]), index_inx_predictions[0], last_index_inx_value)
+            index_dj_predictions = stock_fix_for_1118_model(float(X_sentiment[0][0]), index_dj_predictions[0], last_index_dj_value)
+            index_ixic_predictions = stock_fix_for_1118_model(float(X_sentiment[0][0]), index_ixic_predictions[0], last_index_ixic_value)
+            index_ndx_predictions = stock_fix_for_1118_model(float(X_sentiment[0][0]), index_ndx_predictions[0], last_index_ndx_value)
+            #print("Stock Predictions after fix:", stock_predictions)
+            #print("Index INX Predictions after fix:", index_inx_predictions)
+            #print("Index DJ Predictions after fix:", index_dj_predictions)
+            #print("Index IXIC Predictions after fix:", index_ixic_predictions)
+            #print("Index NDX Predictions after fix:", index_ndx_predictions)
+            # 提取 Index Predictions 中每一天的第一个值
+            index_inx_day_1 = index_inx_predictions[0][0]
+            index_inx_day_2 = index_inx_predictions[1][0]
+            index_inx_day_3 = index_inx_predictions[2][0]
+            index_dj_day_1 = index_dj_predictions[0][0]
+            index_dj_day_2 = index_dj_predictions[1][0]
+            index_dj_day_3 = index_dj_predictions[2][0]
+            index_ixic_day_1 = index_ixic_predictions[0][0]
+            index_ixic_day_2 = index_ixic_predictions[1][0]
+            index_ixic_day_3 = index_ixic_predictions[2][0]
+            index_ndx_day_1 = index_ndx_predictions[0][0]
+            index_ndx_day_2 = index_ndx_predictions[1][0]
+            index_ndx_day_3 = index_ndx_predictions[2][0]
+            stock_day_1 = stock_predictions[0][0]
+            stock_day_2 = stock_predictions[1][0]
+            stock_day_3 = stock_predictions[2][0]
+            # 计算 impact_1_day, impact_2_day, impact_3_day
+            impact_inx_1_day = (index_inx_day_1 - last_index_inx_value) / last_index_inx_value if last_index_inx_value != 0 else 0
+            impact_inx_2_day = (index_inx_day_2 - index_inx_day_1) / index_inx_day_1 if index_inx_day_1 != 0 else 0
+            impact_inx_3_day = (index_inx_day_3 - index_inx_day_2) / index_inx_day_2 if index_inx_day_2 != 0 else 0
+            impact_dj_1_day = (index_dj_day_1 - last_index_dj_value) / last_index_dj_value if last_index_dj_value != 0 else 0
+            impact_dj_2_day = (index_dj_day_2 - index_dj_day_1) / index_dj_day_1 if index_dj_day_1 != 0 else 0
+            impact_dj_3_day = (index_dj_day_3 - index_dj_day_2) / index_dj_day_2 if index_dj_day_2 != 0 else 0
+            impact_ixic_1_day = (index_ixic_day_1 - last_index_ixic_value) / last_index_ixic_value if last_index_ixic_value != 0 else 0
+            impact_ixic_2_day = (index_ixic_day_2 - index_ixic_day_1) / index_ixic_day_1 if index_ixic_day_1 != 0 else 0
+            impact_ixic_3_day = (index_ixic_day_3 - index_ixic_day_2) / index_ixic_day_2 if index_ixic_day_2 != 0 else 0
+            impact_ndx_1_day = (index_ndx_day_1 - last_index_ndx_value) / last_index_ndx_value if last_index_ndx_value != 0 else 0
+            impact_ndx_2_day = (index_ndx_day_2 - index_ndx_day_1) / index_ndx_day_1 if index_ndx_day_1 != 0 else 0
+            impact_ndx_3_day = (index_ndx_day_3 - index_ndx_day_2) / index_ndx_day_2 if index_ndx_day_2 != 0 else 0
+            impact_stock_1_day = (stock_day_1 - last_stock_value) / last_stock_value if last_stock_value != 0 else 0
+            impact_stock_2_day = (stock_day_2 - stock_day_1) / stock_day_1 if stock_day_1 != 0 else 0
+            impact_stock_3_day = (stock_day_3 - stock_day_2) / stock_day_2 if stock_day_2 != 0 else 0
+            # 将 impact 值转换为百分比字符串
+            impact_inx_1_day_str = f"{impact_inx_1_day:.2%}"
+            impact_inx_2_day_str = f"{impact_inx_2_day:.2%}"
+            impact_inx_3_day_str = f"{impact_inx_3_day:.2%}"
+            impact_dj_1_day_str = f"{impact_dj_1_day:.2%}"
+            impact_dj_2_day_str = f"{impact_dj_2_day:.2%}"
+            impact_dj_3_day_str = f"{impact_dj_3_day:.2%}"
+            impact_ixic_1_day_str = f"{impact_ixic_1_day:.2%}"
+            impact_ixic_2_day_str = f"{impact_ixic_2_day:.2%}"
+            impact_ixic_3_day_str = f"{impact_ixic_3_day:.2%}"
+            impact_ndx_1_day_str = f"{impact_ndx_1_day:.2%}"
+            impact_ndx_2_day_str = f"{impact_ndx_2_day:.2%}"
+            impact_ndx_3_day_str = f"{impact_ndx_3_day:.2%}"
+            impact_stock_1_day_str = f"{impact_stock_1_day:.2%}"
+            impact_stock_2_day_str = f"{impact_stock_2_day:.2%}"
+            impact_stock_3_day_str = f"{impact_stock_3_day:.2%}"
+            # 扩展股票预测数据到分钟级别
+            stock_predictions = extend_stock_days_to_mins(stock_predictions)
+            index_inx_predictions = extend_stock_days_to_mins(index_inx_predictions)
+            index_dj_predictions = extend_stock_days_to_mins(index_dj_predictions)
+            index_ixic_predictions = extend_stock_days_to_mins(index_ixic_predictions)
+            index_ndx_predictions = extend_stock_days_to_mins(index_ndx_predictions)
+            # 如果需要返回原始预测数据进行调试，可以直接将其放到响应中
+            result = {
+                "news_title": input_text,
+                "ai_prediction_score": float(X_sentiment[0][0]),  # 假设第一个预测值是 AI 预测得分
+                "impact_inx_1_day": impact_inx_1_day_str,                # 计算并格式化 impact_1_day
+                "impact_inx_2_day": impact_inx_2_day_str,                # 计算并格式化 impact_2_day
+                "impact_inx_3_day": impact_inx_3_day_str,
+                "impact_dj_1_day": impact_dj_1_day_str,                # 计算并格式化 impact_1_day
+                "impact_dj_2_day": impact_dj_2_day_str,                # 计算并格式化 impact_2_day
+                "impact_dj_3_day": impact_dj_3_day_str,
+                "impact_ixic_1_day": impact_ixic_1_day_str,                # 计算并格式化 impact_1_day
+                "impact_ixic_2_day": impact_ixic_2_day_str,                # 计算并格式化 impact_2_day
+                "impact_ixic_3_day": impact_ixic_3_day_str,
+                "impact_ndx_1_day": impact_ndx_1_day_str,                # 计算并格式化 impact_1_day
+                "impact_ndx_2_day": impact_ndx_2_day_str,                # 计算并格式化 impact_2_day
+                "impact_ndx_3_day": impact_ndx_3_day_str,
+                "impact_stock_1_day": impact_stock_1_day_str,                # 计算并格式化 impact_1_day
+                "impact_stock_2_day": impact_stock_2_day_str,                # 计算并格式化 impact_2_day
+                "impact_stock_3_day": impact_stock_3_day_str,
+                "affected_stock_codes": stock_code,  # 动态生成受影响的股票代码
+                "accuracy": float(fake_accuracy),
+                "impact_on_stock": stock_predictions,     # 第一个预测值是股票影响
+                "impact_on_index_inx": index_inx_predictions,     # 第一个预测值是股票影响
+                "impact_on_index_dj": index_dj_predictions,     # 第一个预测值是股票影响
+                "impact_on_index_ixic": index_ixic_predictions,     # 第一个预测值是股票影响
+                "impact_on_index_ndx": index_ndx_predictions,     # 第一个预测值是股票影响
+            }
+            final_result_list.append(result)
         # 缓存预测结果
+        prediction_cache[cache_key] = final_result_list
         # 如果缓存大小超过最大限制，移除最早的缓存项
         if len(prediction_cache) > CACHE_MAX_SIZE:
             prediction_cache.popitem(last=False)
         # 返回预测结果
+        return final_result_list
     except Exception as e:
         # 打印完整的错误堆栈信息

preprocess.py CHANGED Viewed

@@ -222,9 +222,8 @@ def get_sentiment_score(text):
-def get_stock_info(stock_codes, history_days=30):
     # 获取股票代码和新闻日期
-    stock_codes = stock_codes
     news_date = datetime.now().strftime('%Y%m%d')
     # print(f"Getting stock info for {stock_codes} on {news_date}")
@@ -314,70 +313,42 @@ def get_stock_info(stock_codes, history_days=30):
         return previous_rows, following_rows
-    if not stock_codes or stock_codes == ['']:
-        # 如果 stock_codes 为空，直接获取并返回大盘数据
-        stock_index_ndx_history = get_stock_index_history("", news_date, 1)
-        stock_index_dj_history = get_stock_index_history("", news_date, 2)
-        stock_index_inx_history = get_stock_index_history("", news_date, 3)
-        stock_index_ixic_history = get_stock_index_history("", news_date, 4)
-        previous_ndx_rows, following_ndx_rows = process_history(stock_index_ndx_history, news_date, history_days)
-        previous_dj_rows, following_dj_rows = process_history(stock_index_dj_history, news_date, history_days)
-        previous_inx_rows, following_inx_rows = process_history(stock_index_inx_history, news_date, history_days)
-        previous_ixic_rows, following_ixic_rows = process_history(stock_index_ixic_history, news_date, history_days)
-        previous_stock_inx_index_history.append(previous_inx_rows.values.tolist())
-        previous_stock_dj_index_history.append(previous_dj_rows.values.tolist())
-        previous_stock_ixic_index_history.append(previous_ixic_rows.values.tolist())
-        previous_stock_ndx_index_history.append(previous_ndx_rows.values.tolist())
-        following_stock_inx_index_history.append(following_inx_rows.values.tolist())
-        following_stock_dj_index_history.append(following_dj_rows.values.tolist())
-        following_stock_ixic_index_history.append(following_ixic_rows.values.tolist())
-        following_stock_ndx_index_history.append(following_ndx_rows.values.tolist())
         # 个股补零逻辑
         previous_stock_history.append([[-1] * 6] * history_days)
         following_stock_history.append([[-1] * 6] * 3)
     else:
-        for stock_code in stock_codes:
-            stock_code = stock_code.strip()
-            stock_history = get_stock_history(stock_code, news_date)
-            # 处理个股数据
-            previous_rows, following_rows = process_history(stock_history, news_date)
-            previous_stock_history.append(previous_rows.values.tolist())
-            following_stock_history.append(following_rows.values.tolist())
-            # 处理大盘数据
-            stock_index_ndx_history = get_stock_index_history("", news_date, 1)
-            stock_index_dj_history = get_stock_index_history("", news_date, 2)
-            stock_index_inx_history = get_stock_index_history("", news_date, 3)
-            stock_index_ixic_history = get_stock_index_history("", news_date, 4)
-            previous_ndx_rows, following_ndx_rows = process_history(stock_index_ndx_history, news_date, history_days)
-            previous_dj_rows, following_dj_rows = process_history(stock_index_dj_history, news_date, history_days)
-            previous_inx_rows, following_inx_rows = process_history(stock_index_inx_history, news_date, history_days)
-            previous_ixic_rows, following_ixic_rows = process_history(stock_index_ixic_history, news_date, history_days)
-            previous_stock_inx_index_history.append(previous_inx_rows.values.tolist())
-            previous_stock_dj_index_history.append(previous_dj_rows.values.tolist())
-            previous_stock_ixic_index_history.append(previous_ixic_rows.values.tolist())
-            previous_stock_ndx_index_history.append(previous_ndx_rows.values.tolist())
-            following_stock_inx_index_history.append(following_inx_rows.values.tolist())
-            following_stock_dj_index_history.append(following_dj_rows.values.tolist())
-            following_stock_ixic_index_history.append(following_ixic_rows.values.tolist())
-            following_stock_ndx_index_history.append(following_ndx_rows.values.tolist())
-            # 只返回第一支股票的数据
-            break
     return  previous_stock_history, following_stock_history, \
             previous_stock_inx_index_history, previous_stock_dj_index_history, previous_stock_ixic_index_history, previous_stock_ndx_index_history, \

+def get_stock_info(stock_code: str, history_days=30):
     # 获取股票代码和新闻日期
     news_date = datetime.now().strftime('%Y%m%d')
     # print(f"Getting stock info for {stock_codes} on {news_date}")
         return previous_rows, following_rows
+    stock_index_ndx_history = get_stock_index_history("", news_date, 1)
+    stock_index_dj_history = get_stock_index_history("", news_date, 2)
+    stock_index_inx_history = get_stock_index_history("", news_date, 3)
+    stock_index_ixic_history = get_stock_index_history("", news_date, 4)
+    previous_ndx_rows, following_ndx_rows = process_history(stock_index_ndx_history, news_date, history_days)
+    previous_dj_rows, following_dj_rows = process_history(stock_index_dj_history, news_date, history_days)
+    previous_inx_rows, following_inx_rows = process_history(stock_index_inx_history, news_date, history_days)
+    previous_ixic_rows, following_ixic_rows = process_history(stock_index_ixic_history, news_date, history_days)
+    previous_stock_inx_index_history.append(previous_inx_rows.values.tolist())
+    previous_stock_dj_index_history.append(previous_dj_rows.values.tolist())
+    previous_stock_ixic_index_history.append(previous_ixic_rows.values.tolist())
+    previous_stock_ndx_index_history.append(previous_ndx_rows.values.tolist())
+    following_stock_inx_index_history.append(following_inx_rows.values.tolist())
+    following_stock_dj_index_history.append(following_dj_rows.values.tolist())
+    following_stock_ixic_index_history.append(following_ixic_rows.values.tolist())
+    following_stock_ndx_index_history.append(following_ndx_rows.values.tolist())
+    if not stock_code or stock_code == '' or stock_code == 'NONE_SYMBOL_FOUND':
         # 个股补零逻辑
         previous_stock_history.append([[-1] * 6] * history_days)
         following_stock_history.append([[-1] * 6] * 3)
     else:
+        stock_code = stock_code.strip()
+        stock_history = get_stock_history(stock_code, news_date)
+        # 处理个股数据
+        previous_rows, following_rows = process_history(stock_history, news_date)
+        previous_stock_history.append(previous_rows.values.tolist())
+        following_stock_history.append(following_rows.values.tolist())
     return  previous_stock_history, following_stock_history, \
             previous_stock_inx_index_history, previous_stock_dj_index_history, previous_stock_ixic_index_history, previous_stock_ndx_index_history, \

us_stock.py CHANGED Viewed

@@ -11,6 +11,8 @@ import requests
 import threading
 import asyncio
 logging.basicConfig(level=logging.INFO)
@@ -150,6 +152,32 @@ def reduce_columns(df, columns_to_keep):
     return df[columns_to_keep]
 # 返回个股历史数据
 def get_stock_history(symbol, news_date, retries=10):
     # 定义重试间隔时间序列（秒）
@@ -326,7 +354,7 @@ def find_stock_codes_or_names(entities):
         # 检查 Symbol 列
         if entity_upper in all_symbols:
             stock_codes.add(entity_upper)
-            print(f"Matched symbol: {entity_upper}")
         # 检查 Name 列，确保完整匹配而不是部分匹配
         for name, symbol in name_to_symbol.items():
@@ -336,7 +364,9 @@ def find_stock_codes_or_names(entities):
                 stock_codes.add(symbol.upper())
                 #print(f"Matched name/company: '{entity_lower}' in '{name}' -> {symbol.upper()}")
-    print(f"Stock codes found: {stock_codes}")
     return list(stock_codes)

 import threading
 import asyncio
+import yfinance
 logging.basicConfig(level=logging.INFO)
     return df[columns_to_keep]
+# 创建缓存字典
+_price_cache = {}
+def get_last_minute_stock_price(symbol: str) -> float:
+    """获取股票最新价格，使用30分钟缓存"""
+    current_time = datetime.now()
+    # 检查缓存
+    if symbol in _price_cache:
+        cached_price, cached_time = _price_cache[symbol]
+        # 如果缓存时间在30分钟内，直接返回缓存的价格
+        if current_time - cached_time < timedelta(minutes=30):
+            return cached_price
+    # 缓存无效或不存在，从yfinance获取新数据
+    stock_data = yfinance.download(symbol, period='1d', interval='5m')
+    if stock_data.empty:
+        return -1.0
+    latest_price = float(stock_data['Close'].iloc[-1])
+    # 更新缓存
+    _price_cache[symbol] = (latest_price, current_time)
+    return latest_price
 # 返回个股历史数据
 def get_stock_history(symbol, news_date, retries=10):
     # 定义重试间隔时间序列（秒）
         # 检查 Symbol 列
         if entity_upper in all_symbols:
             stock_codes.add(entity_upper)
+            #print(f"Matched symbol: {entity_upper}")
         # 检查 Name 列，确保完整匹配而不是部分匹配
         for name, symbol in name_to_symbol.items():
                 stock_codes.add(symbol.upper())
                 #print(f"Matched name/company: '{entity_lower}' in '{name}' -> {symbol.upper()}")
+    #print(f"Stock codes found: {stock_codes}")
+    if not stock_codes:
+        return ['NONE_SYMBOL_FOUND']
     return list(stock_codes)