import gradio as gr import spaces import os import spaces import torch import random import time import re from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextStreamer # Set an environment variable HF_TOKEN = os.environ.get("HF_TOKEN", None) zero = torch.Tensor([0]).cuda() print(zero.device) # <-- 'cpu' 🤔 model_id = 'FINGU-AI/Qwen-Orpo-v1' #attn_implementation="flash_attention_2", model = AutoModelForCausalLM.from_pretrained(model_id,attn_implementation="sdpa", torch_dtype= torch.bfloat16) tokenizer = AutoTokenizer.from_pretrained(model_id) # streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) model.to('cuda') # terminators = [ # tokenizer.eos_token_id, # tokenizer.convert_tokens_to_ids("<|eot_id|>") # ] generation_params = { 'max_new_tokens': 1000, 'use_cache': True, 'do_sample': True, 'temperature': 0.7, 'top_p': 0.9, # 'top_k': 50, } @spaces.GPU def inference(query): messages = [ {"role": "system", "content": """You are ai trader, invester helpfull assistant."""}, {"role": "user", "content": f"{query}"}, ] tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda") outputs = model.generate(tokenized_chat, **generation_params) decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True) assistant_response = decoded_outputs[0][len(messages):]).split("<|im_start|>assistant\n")[-1].strip() response_ = assistant_response.replace('<|im_end|>', "") return response_ # outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer) # return outputs examples = ['How can options strategies such as straddles, strangles, and spreads be used to hedge against market volatility?', 'How do changes in interest rates, inflation, and GDP growth impact stock and bond markets?', 'What are the key components and strategies involved in developing an effective algorithmic trading system?', 'How can investors integrate environmental, social, and governance (ESG) factors into their investment decisions to achieve both financial returns and social impact?', 'How do geopolitical events such as trade wars, political instability, and international conflicts affect global financial markets?', 'How does blockchain technology have the potential to disrupt financial markets and investment practices?'] def response(message, history): text = inference(message) for i in range(len(text)): time.sleep(0.01) yield text[: i + 1] gr.ChatInterface(response,examples=examples).launch()