Spaces:

saintyboy
/

newaimodel

Runtime error

App Files Files Community

saintyboy commited on Jun 11, 2024

Commit

4028f2b

verified ·

1 Parent(s): 16d99c0

Update app.py

Browse files

Files changed (1) hide show

app.py +128 -7

app.py CHANGED Viewed

@@ -1,15 +1,136 @@
 import gradio as gr
-from transformers import pipeline
-# Load the sentiment analysis model
-classifier = pipeline('sentiment-analysis')
-def sentiment_analysis(text):
-    result = classifier(text)
-    return result[0]['label'], result[0]['score']
 # Create a Gradio interface
-demo = gr.Interface(fn=sentiment_analysis, inputs="text", outputs=["text", "number"])
 # Launch the Gradio app
 demo.launch()

+import os
+import torch
+import tiktoken
+from model import GPT, GPTConfig
+import pickle
+import string
 import gradio as gr
+from contextlib import nullcontext
+# Model and Tokenizer setup
+device = 'cpu'
+dtype = 'bfloat16' if device != 'cpu' and torch.cuda.is_bf16_supported() else 'float16'
+device_type = 'cuda' if 'cuda' in device else 'cpu'
+ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]
+ctx = nullcontext() if device_type == 'cpu' else torch.amp.autocast(device_type=device_type, dtype=ptdtype)
+cl100k_base = tiktoken.get_encoding("cl100k_base")
+enc = tiktoken.Encoding(
+    name="cl100k_im",
+    pat_str=cl100k_base._pat_str,
+    mergeable_ranks=cl100k_base._mergeable_ranks,
+    special_tokens={
+        **cl100k_base._special_tokens,
+        "<EOR>": 100264,
+        "<SPECIAL2>": 100265,
+    }
+)
+# Load model from checkpoint
+model_save_path = 'latest_model.pt'
+if os.path.exists(model_save_path):
+    model = torch.load(model_save_path, map_location=device)
+else:
+    raise FileNotFoundError(f"Model file {model_save_path} not found")
+model.eval()
+model.to(device)
+model = torch.compile(model)
+# Function to encode and decode using the tokenizer
+def encode(text):
+    return enc.encode(text, allowed_special={"<EOR>"})
+def decode(tokens):
+    return enc.decode(tokens)
+# Function to truncate output to token limit
+def truncate_output(text, token_limit):
+    tokens = text.split()
+    if len(tokens) > token_limit:
+        return ' '.join(tokens[:token_limit]) + '...'
+    return text
+def ensure_complete_output(output, context, max_length, temperature, top_k, top_p, repetition_penalty, eor_token_id):
+    while len(output.split()) < max_length and not output.endswith('.'):
+        continuation = model.generate(
+            torch.tensor(encode(output), dtype=torch.long, device=device)[None, ...],
+            max_new_tokens=max_length,
+            temperature=temperature,
+            top_k=top_k,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty,
+            eor_token_id=eor_token_id
+        )
+        continuation_text = decode(continuation[0].tolist())
+        if eor_token_id in continuation[0].tolist():
+            continuation_text = continuation_text.split("<EOR>")[0]
+            output += continuation_text
+            break
+        else:
+            output += continuation_text
+            if len(output.split()) >= max_length:
+                break
+    return output
+# Text generation function for Gradio interface
+def generate_text(prompt, num_samples, max_new_tokens, temperature, top_k, top_p, repetition_penalty, eor_token_id):
+    with torch.no_grad():
+        with ctx:
+            start_ids = encode(prompt)
+            initial_prompt = torch.tensor(start_ids, dtype=torch.long, device=device)[None, ...]
+            outputs = []
+            for _ in range(num_samples):
+                y = model.generate(
+                    initial_prompt,
+                    max_new_tokens=max_new_tokens,
+                    temperature=temperature,
+                    top_k=top_k,
+                    top_p=top_p,
+                    repetition_penalty=repetition_penalty,
+                    eor_token_id=eor_token_id
+                )
+                # Filter out tokens after the end-of-response token or similar markers
+                output_ids = y[0].tolist()
+                if eor_token_id in output_ids:
+                    output_ids = output_ids[:output_ids.index(eor_token_id) + 1]  # Include EOR token
+                else:
+                    # Check for similar markers like '<E' and handle them
+                    try:
+                        eor_index = next(i for i, token in enumerate(output_ids) if decode([token]).startswith('<E'))
+                        output_ids = output_ids[:eor_index]
+                    except StopIteration:
+                        pass
+                # Ensure the prompt is not included in the final output
+                output = decode(output_ids).replace(prompt, '').strip()
+                output = ensure_complete_output(output, prompt, max_new_tokens, temperature, top_k, top_p, repetition_penalty, eor_token_id)
+                truncated_output = truncate_output(output, max_new_tokens)
+                outputs.append(truncated_output)
+            return '\n\n'.join(outputs)
 # Create a Gradio interface
+demo = gr.Interface(
+    fn=generate_text,
+    inputs=[
+        gr.inputs.Textbox(lines=2, placeholder="Enter your prompt here...", default="Write a short story about a boy:"),
+        gr.inputs.Slider(minimum=1, maximum=10, step=1, default=1, label="Number of Samples"),
+        gr.inputs.Slider(minimum=10, maximum=200, step=1, default=75, label="Max New Tokens"),
+        gr.inputs.Slider(minimum=0.1, maximum=1.0, step=0.1, default=0.8, label="Temperature"),
+        gr.inputs.Slider(minimum=1, maximum=100, step=1, default=50, label="Top-k"),
+        gr.inputs.Slider(minimum=0.1, maximum=1.0, step=0.1, default=0.85, label="Top-p"),
+        gr.inputs.Slider(minimum=1.0, maximum=2.0, step=0.1, default=1.1, label="Repetition Penalty"),
+        gr.inputs.Number(default=100264, label="End-of-Response Token ID")
+    ],
+    outputs="text",
+    title="GPT Text Generator",
+    description="Generate text based on a prompt using a trained GPT model.",
+    examples=[
+        ["Write a short story about a boy:"],
+        ["Explain the theory of relativity:"],
+        ["What is the meaning of life?"]
+    ]
+)
 # Launch the Gradio app
 demo.launch()