Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -108,7 +108,7 @@ def respond(message, history):
|
|
108 |
llm = HuggingFaceInferenceAPI(
|
109 |
model_name=selected_llm_model_name,
|
110 |
contextWindow=8192, # Context window size (typically max length of the model)
|
111 |
-
maxTokens=
|
112 |
temperature=0.3, # Lower temperature for more focused answers (0.2-0.4 for factual info)
|
113 |
topP=0.9, # Top-p sampling to control diversity while retaining quality
|
114 |
frequencyPenalty=0.5, # Slight penalty to avoid repetition
|
@@ -120,8 +120,8 @@ def respond(message, history):
|
|
120 |
query_engine = vector_index.as_query_engine(llm=llm)
|
121 |
bot_message = query_engine.query(message)
|
122 |
|
123 |
-
print(f"\n{datetime.now()}:{selected_llm_model_name}:: {message} --> {str(bot_message)}\n")
|
124 |
-
return f"{selected_llm_model_name}:\n{str(bot_message)}"
|
125 |
except Exception as e:
|
126 |
if str(e) == "'NoneType' object has no attribute 'as_query_engine'":
|
127 |
return "Please upload a file."
|
|
|
108 |
llm = HuggingFaceInferenceAPI(
|
109 |
model_name=selected_llm_model_name,
|
110 |
contextWindow=8192, # Context window size (typically max length of the model)
|
111 |
+
maxTokens=2048, # Tokens per response generation (512-1024 works well for detailed answers)
|
112 |
temperature=0.3, # Lower temperature for more focused answers (0.2-0.4 for factual info)
|
113 |
topP=0.9, # Top-p sampling to control diversity while retaining quality
|
114 |
frequencyPenalty=0.5, # Slight penalty to avoid repetition
|
|
|
120 |
query_engine = vector_index.as_query_engine(llm=llm)
|
121 |
bot_message = query_engine.query(message)
|
122 |
|
123 |
+
print(f"\n{datetime.now()}:{selected_llm_model_name} :: {message} --> {str(bot_message)}\n")
|
124 |
+
return f"{selected_llm_model_name}:\n\n{str(bot_message)}"
|
125 |
except Exception as e:
|
126 |
if str(e) == "'NoneType' object has no attribute 'as_query_engine'":
|
127 |
return "Please upload a file."
|