Spaces:

manohar02
/

manohar02-Llama-2-7b-finetune

Sleeping

File size: 1,259 Bytes

58eb302
2d2d40c
 
 
 
58eb302
2d2d40c
2af70a4
9bdeb47
2d2d40c
 
 
 
 
 
 
 
 
 
 
 
 
9bdeb47
2d2d40c
9bdeb47
2d2d40c

import gradio as gr
from langchain import HuggingFacePipeline, PromptTemplate, LLMChain
from transformers import AutoTokenizer
import transformers
import torch

# Define the Hugging Face model
model = "manohar02/Llama-2-7b-finetune"

# Define the Hugging Face pipeline
pipeline = transformers.pipeline(
    "text-generation",  # task
    model=model,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
    max_length=20000,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=AutoTokenizer.from_pretrained(model).eos_token_id
)

llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature': 0})

# Define the template for summarization
template = """
Write a concise summary of the following text delimited by triple backquotes.
'''{text}'''
SUMMARY:
"""

prompt = PromptTemplate(template=template, input_variables=["text"])

llm_chain = LLMChain(prompt=prompt, llm=llm)

# Function to generate summary
def generate_summary(text):
    summary = llm_chain.run(text)
    # Extract only the summary part from the output
    return summary.strip()

# Create a Gradio interface
iface = gr.Interface(fn=generate_summary, inputs="text", outputs="text", title="LLaMA2 Summarizer")
iface.launch()