import gradio as gr import keras_nlp print("Modules loaded!") print("Fetching model...") model = keras_nlp.models.GemmaCausalLM.from_preset("hf://bhashwarsengupta/gemma2-instruct-2b-en-finance") print("model successfully loaded!") def respond( message, history: list[tuple[str, str]] ): messages = f"Question:\n{message}\n\nAnswer:\n" print("Generating response...") output = model.generate(messages) print("Response generated!") # Split by "Answer:" from the right and get the last part response = output.rsplit("Answer:\n", 1)[-1] return response demo = gr.ChatInterface( respond ) if __name__ == "__main__": demo.launch()