TAKAHE / app.py
steve7909's picture
added anki sentence pairs
a224a43
raw
history blame
3.43 kB
# -*- coding: utf-8 -*-
"""translation practice.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1KrnodZGBZrUFdaJ9FIn8IhtWtCL7peoE
"""
import requests
import gradio as gr
from dotenv import load_dotenv
import os
#from openai import OpenAI
from langchain_openai import OpenAI
import spacy
#from langchain.chat_models import ChatOpenAI
from langchain_openai import ChatOpenAI
from langchain.schema import AIMessage, HumanMessage
# Load environment variables from .env file
load_dotenv()
# Access the env
HF_TOKEN = os.getenv('HUGGING_FACE_TOKEN')
# openai setup
client = OpenAI(
api_key=os.getenv('OPENAI_API_KEY')
)
# hugging face setup
#model_name = "mmnga/ELYZA-japanese-Llama-2-7b-instruct-gguf"
API_URL = f"https://api-inference.huggingface.co/models/"
#API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
# Global variable to control debug printing
DEBUG_MODE = True
def debug_print(*args, **kwargs):
if DEBUG_MODE:
print(*args, **kwargs)
def split_sentences_ginza(input_text):
nlp = spacy.load("ja_core_news_sm")
doc = nlp(input_text)
sentences = [sent.text for sent in doc.sents]
return sentences
def query_hf(payload, model_name):
# HTTP POST Request
response = requests.post(API_URL+model_name, headers=headers, json=payload)
return response.json()
def translate_hf(input_text):
debug_print("Translating... ", input_text)
sentences = split_sentences_ginza(input_text) # split into sentences
translated_sentences = []
debug_print("Split sentences... ", sentences)
for sentence in sentences:
if sentence.strip(): # Ensure sentence is not empty
# API Request for each sentence:
response = query_hf({
"inputs": sentence.strip(),
"options": {"wait_for_model": True}
}, "Helsinki-NLP/opus-mt-ja-en")
debug_print("response: ", response)
translated_sentence = response[0]["translation_text"]
translated_sentences.append(translated_sentence)
# Join the translated sentences
translation = ' '.join(translated_sentences)
return translation
def translate_openai(input_text):
prompt = "Translate the following text into Japanese language: " + input_text
response = client.chat.completions.create( # get translation from GPT
messages=[
{
"role": "user",
"content": prompt,
}
],
model="gpt-3.5-turbo",
temperature=0 # should be the same translation every time
)
translation = response.choices[0].message.content
debug_print("GPT translation:", translation)
return translation
llm = ChatOpenAI(temperature=0.7, model='gpt-3.5-turbo')
def predict(message, history):
history_langchain_format = []
for human, ai in history:
history_langchain_format.append(HumanMessage(content=human)) # converted to str to avoid error; not compatible with multimodal
history_langchain_format.append(AIMessage(content=ai))
history_langchain_format.append(HumanMessage(content=message))
gpt_response = llm(history_langchain_format)
return gpt_response.content
app = gr.ChatInterface(fn=predict, title="Translation Chatbot")#, multimodal=True)
app.launch()