Spaces:

eagle0504
/

document-search-q-series

Running

App Files Files Community

eagle0504 commited on May 10, 2024

Commit

afc3996

verified ·

1 Parent(s): 4e18d60

Update helper/utils.py

Browse files

Files changed (1) hide show

helper/utils.py +59 -0

helper/utils.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import os
 from datetime import datetime
 from typing import Any, Dict, List, Tuple, Union
 import numpy as np
 import pandas as pd
@@ -205,6 +207,63 @@ def call_llama(prompt: str) -> str:
     return response.choices[0].message.content
 def quantize_to_kbit(arr: Union[np.ndarray, Any], k: int = 16) -> np.ndarray:
     """Converts an array to a k-bit representation by normalizing and scaling its values.

 import os
 from datetime import datetime
+import json
 from typing import Any, Dict, List, Tuple, Union
+import requests
 import numpy as np
 import pandas as pd
     return response.choices[0].message.content
+def call_llama2(prompt: str, max_new_tokens: int = 50, temperature: float = 0.9) -> str:
+    """
+    Calls the Llama API to generate text based on a given prompt, controlling the length and randomness.
+    Args:
+        prompt (str): The prompt text to send to the Llama model for text generation.
+        max_new_tokens (int, optional): The maximum number of tokens that the model should generate. Defaults to 50.
+        temperature (float, optional): Controls the randomness of the output. Lower values make the model more deterministic.
+            A higher value increases randomness. Defaults to 0.9.
+    Returns:
+        str: The generated text response from the Llama model.
+    Raises:
+        Exception: If the API call fails and returns a non-200 status code, it raises an exception with the error details.
+    """
+    # API endpoint for the Llama model
+    api_url = "https://v6rkdcyir7.execute-api.us-east-1.amazonaws.com/beta"
+    # Configuration for the request body
+    json_body = {
+        "body": {
+            "inputs": f"<s>[INST] {prompt} [/INST]",
+            "parameters": {
+                "max_new_tokens": max_new_tokens,
+                "top_p": 0.9,  # Fixed probability cutoff to select tokens with cumulative probability above this threshold
+                "temperature": temperature
+            }
+        }
+    }
+    # Headers to indicate that the payload is JSON
+    headers = {"Content-Type": "application/json"}
+    # Perform the POST request to the Llama API
+    response = requests.post(api_url, headers=headers, json=json_body)
+    # Parse the JSON response
+    response_body = response.json()['body']
+    # Convert the string response to a JSON object
+    body_list = json.loads(response_body)
+    # Extract the 'generated_text' from the first item in the list
+    generated_text = body_list[0]['generated_text']
+    # Separate the answer from the instruction
+    answer = generated_text.split("[/INST]")[-1].strip()
+    # Check the status code of the response
+    if response.status_code == 200:
+        return answer  # Return the text generated by the model
+    else:
+        # Raise an exception if the API did not succeed
+        raise Exception(f"Error calling Llama API: {response.status_code}")
 def quantize_to_kbit(arr: Union[np.ndarray, Any], k: int = 16) -> np.ndarray:
     """Converts an array to a k-bit representation by normalizing and scaling its values.