jhansi1 commited on
Commit
202e889
·
verified ·
1 Parent(s): c1cbeee

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
4
+
5
+ # Define paths for the dataset splits
6
+ splits = {
7
+ 'train': 'data/train-00000-of-00001.parquet',
8
+ 'validation': 'data/validation-00000-of-00001.parquet',
9
+ 'test': 'data/test-00000-of-00001.parquet'
10
+ }
11
+
12
+ # Load the dataset
13
+ @st.cache_resource
14
+ def load_dataset(split="train"):
15
+ return pd.read_parquet(f"hf://datasets/BEE-spoke-data/survivorslib-law-books/{splits[split]}")
16
+
17
+ # Initialize the model and tokenizer
18
+ @st.cache_resource
19
+ def load_model():
20
+ model_name = "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
21
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
22
+ model = AutoModelForCausalLM.from_pretrained(model_name)
23
+ return pipeline("text-generation", model=model, tokenizer=tokenizer)
24
+
25
+ # Streamlit interface
26
+ st.title("Legal Text Generator with NVIDIA Llama")
27
+ st.write("Generate text based on the Survivorslib Legal Dataset and the NVIDIA Llama model.")
28
+
29
+ # Load dataset and model pipeline
30
+ st.sidebar.title("Options")
31
+ split_option = st.sidebar.selectbox("Select dataset split", ["train", "validation", "test"])
32
+ dataset = load_dataset(split=split_option)
33
+ text_generator = load_model()
34
+
35
+ # Show sample data from the dataset
36
+ st.subheader(f"Sample Data from {split_option.capitalize()} Split")
37
+ st.write(dataset.head()) # Displaying the first few rows of the selected dataset split
38
+
39
+ # Prompt input
40
+ prompt = st.text_area("Enter your prompt:", placeholder="Type a legal prompt or select a sample text...")
41
+
42
+ # Optional: Select sample text from the dataset to use as a prompt
43
+ if st.button("Use Sample Text"):
44
+ if 'content' in dataset.columns:
45
+ prompt = dataset['content'].iloc[0]
46
+ st.write(f"Using sample text from dataset: {prompt}")
47
+ else:
48
+ st.write("Dataset does not contain a 'content' column with text data.")
49
+
50
+ # Generate text based on the prompt
51
+ if st.button("Generate Response"):
52
+ if prompt:
53
+ with st.spinner("Generating response..."):
54
+ generated_text = text_generator(prompt, max_length=100, do_sample=True, temperature=0.7)[0]["generated_text"]
55
+ st.write("**Generated Text:**")
56
+ st.write(generated_text)
57
+ else:
58
+ st.write("Please enter a prompt to generate a response.")