fruitpicker01 commited on
Commit
08e13cd
1 Parent(s): 8d8d43b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -14
app.py CHANGED
@@ -21,6 +21,8 @@ from utils import best_text_choice
21
 
22
  tokenizer = AutoTokenizer.from_pretrained("ai-forever/ru-en-RoSBERTa")
23
  model = AutoModel.from_pretrained("ai-forever/ru-en-RoSBERTa")
 
 
24
 
25
  MISTRAL_API_KEY = os.getenv('MISTRAL_API_KEY')
26
  token = os.getenv('GITHUB_TOKEN')
@@ -789,29 +791,60 @@ def generate_all_messages(desc, benefits, key_message, gender, generation, psych
789
  save_statistics_to_github(approach_stats)
790
 
791
  def rank_messages(non_personalized_messages, personalized_messages):
792
- # Загружаем DataFrame unique_sms_df, используемый в функции best_text_choice
793
- unique_sms_df = pd.read_parquet('unique_texts.parquet')
794
 
795
- # Разделяем сообщения на отдельные строки
796
- non_personalized_list = [msg.strip() for msg in non_personalized_messages.strip().split('\n\n') if msg.strip()]
797
- personalized_list = [msg.strip() for msg in personalized_messages.strip().split('\n\n') if msg.strip()]
 
 
 
798
 
799
- # Ранжируем неперсонализированные сообщения
800
- ranked_non_personalized_df = best_text_choice(non_personalized_list, unique_sms_df, tokenizer, model)
801
- # Ранжируем персонализированные сообщения
802
- ranked_personalized_df = best_text_choice(personalized_list, unique_sms_df, tokenizer, model)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
803
 
804
- # Предполагается, что функция best_text_choice возвращает DataFrame с колонкой 'text' или 'message'
805
- # Извлекаем список сообщений из DataFrame
 
 
 
 
806
  ranked_non_personalized_messages_list = ranked_non_personalized_df['text'].tolist()
807
  ranked_personalized_messages_list = ranked_personalized_df['text'].tolist()
808
 
809
- # Формируем строки для отображения
810
- ranked_non_personalized_messages = '\n\n'.join(ranked_non_personalized_messages_list)
811
- ranked_personalized_messages = '\n\n'.join(ranked_personalized_messages_list)
 
 
 
 
 
 
 
 
 
 
 
812
 
813
  return ranked_non_personalized_messages, ranked_personalized_messages
814
 
 
815
  # ФУНКЦИИ ПРОВЕРОК (НАЧАЛО)
816
 
817
  # 1. Запрещенные слова
 
21
 
22
  tokenizer = AutoTokenizer.from_pretrained("ai-forever/ru-en-RoSBERTa")
23
  model = AutoModel.from_pretrained("ai-forever/ru-en-RoSBERTa")
24
+ # Load the DataFrame used in the best_text_choice function
25
+ unique_sms_df = pd.read_parquet('unique_texts.parquet')
26
 
27
  MISTRAL_API_KEY = os.getenv('MISTRAL_API_KEY')
28
  token = os.getenv('GITHUB_TOKEN')
 
791
  save_statistics_to_github(approach_stats)
792
 
793
  def rank_messages(non_personalized_messages, personalized_messages):
 
 
794
 
795
+ # Function to clean each message by removing metadata
796
+ def clean_message(msg):
797
+ # Remove metadata after '------'
798
+ if '------' in msg:
799
+ msg = msg.split('------')[0].strip()
800
+ return msg.strip()
801
 
802
+ # Split and clean non-personalized messages
803
+ non_personalized_list = [
804
+ clean_message(msg)
805
+ for msg in non_personalized_messages.strip().split('\n\n')
806
+ if msg.strip()
807
+ ]
808
+
809
+ # Split and clean personalized messages
810
+ personalized_list = [
811
+ clean_message(msg)
812
+ for msg in personalized_messages.strip().split('\n\n')
813
+ if msg.strip()
814
+ ]
815
+
816
+ # Rank non-personalized messages
817
+ ranked_non_personalized_df = best_text_choice(
818
+ non_personalized_list, unique_sms_df, tokenizer, model
819
+ )
820
 
821
+ # Rank personalized messages
822
+ ranked_personalized_df = best_text_choice(
823
+ personalized_list, unique_sms_df, tokenizer, model
824
+ )
825
+
826
+ # Extract messages from DataFrames
827
  ranked_non_personalized_messages_list = ranked_non_personalized_df['text'].tolist()
828
  ranked_personalized_messages_list = ranked_personalized_df['text'].tolist()
829
 
830
+ # Recompute lengths and reconstruct messages with metadata
831
+ def reconstruct_messages(messages_list):
832
+ display_list = []
833
+ for msg in messages_list:
834
+ msg_length = len(msg)
835
+ display_list.append(f"{msg}\n------\nКоличество знаков: {msg_length}")
836
+ return display_list
837
+
838
+ ranked_non_personalized_display = reconstruct_messages(ranked_non_personalized_messages_list)
839
+ ranked_personalized_display = reconstruct_messages(ranked_personalized_messages_list)
840
+
841
+ # Join the messages back into strings
842
+ ranked_non_personalized_messages = '\n\n'.join(ranked_non_personalized_display)
843
+ ranked_personalized_messages = '\n\n'.join(ranked_personalized_display)
844
 
845
  return ranked_non_personalized_messages, ranked_personalized_messages
846
 
847
+
848
  # ФУНКЦИИ ПРОВЕРОК (НАЧАЛО)
849
 
850
  # 1. Запрещенные слова