Spaces:
Sleeping
Sleeping
Fix imports
Browse files- app.py +0 -1
- llama2_response_mail_generator.py +0 -14
app.py
CHANGED
@@ -2,7 +2,6 @@ import gradio as gr
|
|
2 |
import pandas as pd
|
3 |
from key_info import extract_entities
|
4 |
from summarization_with_bart import summarize_email_conditional
|
5 |
-
from llama2_response_mail_generator import generate_email_response
|
6 |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
|
7 |
import torch
|
8 |
from huggingface_hub import hf_hub_download
|
|
|
2 |
import pandas as pd
|
3 |
from key_info import extract_entities
|
4 |
from summarization_with_bart import summarize_email_conditional
|
|
|
5 |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
|
6 |
import torch
|
7 |
from huggingface_hub import hf_hub_download
|
llama2_response_mail_generator.py
CHANGED
@@ -2,20 +2,6 @@ from huggingface_hub import hf_hub_download
|
|
2 |
|
3 |
from llama_cpp import Llama
|
4 |
|
5 |
-
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
|
6 |
-
model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin" # The model is in bin format
|
7 |
-
|
8 |
-
# Download the model file
|
9 |
-
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
|
10 |
-
|
11 |
-
# Initialize the Llama model with appropriate settings for GPU
|
12 |
-
lcpp_llm = Llama(
|
13 |
-
model_path=model_path,
|
14 |
-
n_threads=2, # CPU cores to use
|
15 |
-
n_batch=512, # Batch size for processing; adjust as per your VRAM capacity
|
16 |
-
n_gpu_layers=32 # Number of layers to run on GPU, dependent on your GPU's VRAM
|
17 |
-
)
|
18 |
-
|
19 |
def generate_email_response(email_prompt):
|
20 |
# Check input received by the function
|
21 |
print("Received prompt:", email_prompt)
|
|
|
2 |
|
3 |
from llama_cpp import Llama
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
def generate_email_response(email_prompt):
|
6 |
# Check input received by the function
|
7 |
print("Received prompt:", email_prompt)
|