Spaces:
Sleeping
Sleeping
import os | |
import string | |
from typing import Any, Dict, List, Tuple, Union | |
import chromadb | |
import numpy as np | |
import openai | |
import pandas as pd | |
import requests | |
import streamlit as st | |
from datasets import load_dataset | |
from langchain.document_loaders import TextLoader | |
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.vectorstores import Chroma | |
from scipy.spatial.distance import cosine | |
openai.api_key = os.environ["OPENAI_API_KEY"] | |
def call_chatgpt(prompt: str) -> str: | |
""" | |
Uses the OpenAI API to generate an AI response to a prompt. | |
Args: | |
prompt: A string representing the prompt to send to the OpenAI API. | |
Returns: | |
A string representing the AI's generated response. | |
""" | |
# Use the OpenAI API to generate a response based on the input prompt. | |
response = openai.Completion.create( | |
model="gpt-3.5-turbo-instruct", | |
prompt=prompt, | |
temperature=0.5, | |
max_tokens=500, | |
top_p=1, | |
frequency_penalty=0, | |
presence_penalty=0, | |
) | |
# Extract the text from the first (and only) choice in the response output. | |
ans = response.choices[0]["text"] | |
# Return the generated AI response. | |
return ans | |
def openai_text_embedding(prompt: str) -> str: | |
return openai.Embedding.create(input=prompt, model="text-embedding-ada-002")[ | |
"data" | |
][0]["embedding"] | |
def calculate_sts_openai_score(sentence1: str, sentence2: str) -> float: | |
# Compute sentence embeddings | |
embedding1 = openai_text_embedding(sentence1) # Flatten the embedding array | |
embedding2 = openai_text_embedding(sentence2) # Flatten the embedding array | |
# Convert to array | |
embedding1 = np.asarray(embedding1) | |
embedding2 = np.asarray(embedding2) | |
# Calculate cosine similarity between the embeddings | |
similarity_score = 1 - cosine(embedding1, embedding2) | |
return similarity_score | |
def query(payload: Dict[str, Any]) -> Dict[str, Any]: | |
""" | |
Sends a JSON payload to a predefined API URL and returns the JSON response. | |
Args: | |
payload (Dict[str, Any]): The JSON payload to be sent to the API. | |
Returns: | |
Dict[str, Any]: The JSON response received from the API. | |
""" | |
# API endpoint URL | |
API_URL = "https://sks7h7h5qkhoxwxo.us-east-1.aws.endpoints.huggingface.cloud" | |
# Headers to indicate both the request and response formats are JSON | |
headers = {"Accept": "application/json", "Content-Type": "application/json"} | |
# Sending a POST request with the JSON payload and headers | |
response = requests.post(API_URL, headers=headers, json=payload) | |
# Returning the JSON response | |
return response.json() | |
def llama2_7b_ysa(prompt: str) -> str: | |
""" | |
Queries a model and retrieves the generated text based on the given prompt. | |
This function sends a prompt to a model (presumably named 'llama2_7b') and extracts | |
the generated text from the model's response. It's tailored for handling responses | |
from a specific API or model query structure where the response is expected to be | |
a list of dictionaries, with at least one dictionary containing a key 'generated_text'. | |
Parameters: | |
- prompt (str): The text prompt to send to the model. | |
Returns: | |
- str: The generated text response from the model. | |
Note: | |
- The function assumes that the 'query' function is previously defined and accessible | |
within the same scope or module. It should send a request to the model and return | |
the response in a structured format. | |
- The 'parameters' dictionary is passed empty but can be customized to include specific | |
request parameters as needed by the model API. | |
""" | |
# Define the query payload with the prompt and any additional parameters | |
query_payload: Dict[str, Any] = { | |
"inputs": prompt, | |
"parameters": {"max_new_tokens": 200}, | |
} | |
# Send the query to the model and store the output response | |
output = query(query_payload) | |
# Extract the 'generated_text' from the first item in the response list | |
response: str = output[0]["generated_text"] | |
return response | |