import gradio as gr #import torch import requests import json #from threading import Thread #tokenizer = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-INCITE-Chat-3B-v1") #model = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-INCITE-Chat-3B-v1", torch_dtype=torch.float16) #model = model.to('cuda:0') #class StopOnTokens(StoppingCriteria): # def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: # stop_ids = [29, 0] # for stop_id in stop_ids: # if input_ids[0][-1] == stop_id: # return True # return False def predict(message, history): history_transformer_format = history + [[message, ""]] messages = "".join(["".join(["\n:"+item[0], "\n:"+item[1]]) for item in history_transformer_format]) #model_inputs = tokenizer([messages], return_tensors="pt").to("cuda") #streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True) #generate_kwargs = dict( # model_inputs, # streamer=streamer, # max_new_tokens=1024, # do_sample=True, # top_p=0.95, # top_k=1000, # temperature=1.0, # num_beams=1, # stopping_criteria=StoppingCriteriaList([stop]) # ) #t = Thread(target=model.generate, kwargs=generate_kwargs) #t.start() #partial_message = "" #for new_token in streamer: # if new_token != '<': # partial_message += new_token # yield partial_message # The URL for the API endpoint url = "https://hook.us1.make.com/z7iqjks2oo1fa5u2ntqu4ggsnepdjhal" # The header specifies that we're sending JSON data headers = { "Content-Type": "application/json" } # The data payload for the POST request data = { "parameters": { "src_lang": "en_XX", "tgt_lang": "fr_XX" }, "inputs": message } # Make the POST request response = requests.post(url, headers=headers, data=json.dumps(data), timeout=200) # Check if the request was successful if response.status_code == 200: # Print the content of the response (the data the server returned) response_json = response.json() translation_item = response_json[0] if response_json else {} print(response_json) # Now use .get() to safely get the 'translation_text' translation_text = translation_item.get('translation_text', 'No translation found.') return translation_text else: # Print an error message if something went wrong print(f"Request failed with status code {response.status_code}: {response.text}") return "Sorry I could not answer your question as something went wrong" return "Sorry I could not answer your question as something went wrong" gr.ChatInterface(predict).launch()