Spaces:
Running
Running
new feature added
Browse files- app.py +4 -1
- utils/helper_functions.py +76 -0
app.py
CHANGED
@@ -157,7 +157,10 @@ if prompt := st.chat_input(initial_input):
|
|
157 |
for i in range(final_ref.shape[0]):
|
158 |
this_content = final_ref["answers"][i]
|
159 |
if len(this_content) > 3:
|
160 |
-
|
|
|
|
|
|
|
161 |
else:
|
162 |
this_score = 0
|
163 |
independent_ai_judge_score.append(this_score)
|
|
|
157 |
for i in range(final_ref.shape[0]):
|
158 |
this_content = final_ref["answers"][i]
|
159 |
if len(this_content) > 3:
|
160 |
+
arr1 = openai_text_embedding(question)
|
161 |
+
arr2 = openai_text_embedding(this_content)
|
162 |
+
# this_score = calculate_sts_openai_score(question, this_content)
|
163 |
+
this_score = quantized_influence(arr1, arr2)
|
164 |
else:
|
165 |
this_score = 0
|
166 |
independent_ai_judge_score.append(this_score)
|
utils/helper_functions.py
CHANGED
@@ -18,6 +18,21 @@ from scipy.spatial.distance import cosine
|
|
18 |
openai.api_key = os.environ["OPENAI_API_KEY"]
|
19 |
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def call_chatgpt(prompt: str) -> str:
|
22 |
"""
|
23 |
Uses the OpenAI API to generate an AI response to a prompt.
|
@@ -69,6 +84,46 @@ def calculate_sts_openai_score(sentence1: str, sentence2: str) -> float:
|
|
69 |
return similarity_score
|
70 |
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
def query(payload: Dict[str, Any]) -> Dict[str, Any]:
|
73 |
"""
|
74 |
Sends a JSON payload to a predefined API URL and returns the JSON response.
|
@@ -123,3 +178,24 @@ def llama2_7b_ysa(prompt: str) -> str:
|
|
123 |
response: str = output[0]["generated_text"]
|
124 |
|
125 |
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
openai.api_key = os.environ["OPENAI_API_KEY"]
|
19 |
|
20 |
|
21 |
+
def merge_dataframes(dataframes: List[pd.DataFrame]) -> pd.DataFrame:
|
22 |
+
"""Merges a list of DataFrames, keeping only specific columns."""
|
23 |
+
# Concatenate the list of dataframes
|
24 |
+
combined_dataframe = pd.concat(
|
25 |
+
dataframes, ignore_index=True
|
26 |
+
) # Combine all dataframes into one
|
27 |
+
|
28 |
+
# Ensure that the resulting dataframe only contains the columns "context", "questions", "answers"
|
29 |
+
combined_dataframe = combined_dataframe[
|
30 |
+
["context", "questions", "answers"]
|
31 |
+
] # Filter for specific columns
|
32 |
+
|
33 |
+
return combined_dataframe # Return the merged and filtered DataFrame
|
34 |
+
|
35 |
+
|
36 |
def call_chatgpt(prompt: str) -> str:
|
37 |
"""
|
38 |
Uses the OpenAI API to generate an AI response to a prompt.
|
|
|
84 |
return similarity_score
|
85 |
|
86 |
|
87 |
+
def add_dist_score_column(
|
88 |
+
dataframe: pd.DataFrame,
|
89 |
+
sentence: str,
|
90 |
+
) -> pd.DataFrame:
|
91 |
+
dataframe["stsopenai"] = dataframe["questions"].apply(
|
92 |
+
lambda x: calculate_sts_openai_score(str(x), sentence)
|
93 |
+
)
|
94 |
+
|
95 |
+
sorted_dataframe = dataframe.sort_values(by="stsopenai", ascending=False)
|
96 |
+
return sorted_dataframe.iloc[:5, :]
|
97 |
+
|
98 |
+
|
99 |
+
def convert_to_list_of_dict(df: pd.DataFrame) -> List[Dict[str, str]]:
|
100 |
+
"""
|
101 |
+
Reads in a pandas DataFrame and produces a list of dictionaries with two keys each, 'question' and 'answer.'
|
102 |
+
|
103 |
+
Args:
|
104 |
+
df: A pandas DataFrame with columns named 'questions' and 'answers'.
|
105 |
+
|
106 |
+
Returns:
|
107 |
+
A list of dictionaries, with each dictionary containing a 'question' and 'answer' key-value pair.
|
108 |
+
"""
|
109 |
+
|
110 |
+
# Initialize an empty list to store the dictionaries
|
111 |
+
result = []
|
112 |
+
|
113 |
+
# Loop through each row of the DataFrame
|
114 |
+
for index, row in df.iterrows():
|
115 |
+
# Create a dictionary with the current question and answer
|
116 |
+
qa_dict_quest = {"role": "user", "content": row["questions"]}
|
117 |
+
qa_dict_ans = {"role": "assistant", "content": row["answers"]}
|
118 |
+
|
119 |
+
# Add the dictionary to the result list
|
120 |
+
result.append(qa_dict_quest)
|
121 |
+
result.append(qa_dict_ans)
|
122 |
+
|
123 |
+
# Return the list of dictionaries
|
124 |
+
return result
|
125 |
+
|
126 |
+
|
127 |
def query(payload: Dict[str, Any]) -> Dict[str, Any]:
|
128 |
"""
|
129 |
Sends a JSON payload to a predefined API URL and returns the JSON response.
|
|
|
178 |
response: str = output[0]["generated_text"]
|
179 |
|
180 |
return response
|
181 |
+
|
182 |
+
|
183 |
+
def quantize_to_4bit(arr: Union[np.ndarray, Any]) -> np.ndarray:
|
184 |
+
"""Converts an array to a 4-bit representation by normalizing and scaling its values."""
|
185 |
+
if not isinstance(arr, np.ndarray): # Ensure input is a numpy array
|
186 |
+
arr = np.array(arr)
|
187 |
+
arr_min = arr.min() # Find minimum value
|
188 |
+
arr_max = arr.max() # Find maximum value
|
189 |
+
normalized_arr = (arr - arr_min) / (arr_max - arr_min) # Normalize values to [0, 1]
|
190 |
+
return np.round(normalized_arr * 15).astype(int) # Scale to 0-15 and round
|
191 |
+
|
192 |
+
|
193 |
+
def quantized_influence(arr1: np.ndarray, arr2: np.ndarray) -> float:
|
194 |
+
"""Calculates a weighted measure of influence based on quantized version of input arrays."""
|
195 |
+
arr1_4bit = quantize_to_4bit(arr1) # Quantize arr1 to 4-bit
|
196 |
+
arr2_4bit = quantize_to_4bit(arr2) # Quantize arr2 to 4-bit
|
197 |
+
unique_values = np.unique(arr1_4bit) # Find unique values in arr1_4bit
|
198 |
+
y_bar_global = np.mean(arr2_4bit) # Compute global average of arr2_4bit
|
199 |
+
# Compute weighted local averages and normalize
|
200 |
+
weighted_local_averages = [(np.mean(arr2_4bit[arr1_4bit == val])-y_bar_global)**2 * len(arr2_4bit[arr1_4bit == val])**2 for val in unique_values]
|
201 |
+
return np.mean(weighted_local_averages) / np.std(arr2_4bit) # Return normalized weighted average
|