File size: 4,369 Bytes
26fd6a5
 
 
 
 
 
 
 
 
 
 
 
a224a43
 
26fd6a5
a224a43
 
 
69ce671
26fd6a5
 
 
 
 
 
 
 
69ce671
 
 
26fd6a5
 
 
 
 
 
 
90deb69
 
26fd6a5
90deb69
 
 
26fd6a5
 
 
 
 
 
 
 
69ce671
26fd6a5
69ce671
 
 
 
 
26fd6a5
69ce671
26fd6a5
69ce671
 
 
26fd6a5
69ce671
26fd6a5
69ce671
 
26fd6a5
69ce671
 
26fd6a5
 
69ce671
26fd6a5
 
69ce671
26fd6a5
 
a224a43
 
 
69ce671
 
 
 
 
 
 
4bd2a98
 
69ce671
 
 
 
4bd2a98
 
69ce671
4bd2a98
 
 
 
69ce671
 
4bd2a98
69ce671
 
a224a43
4bd2a98
 
 
 
69ce671
a224a43
4bd2a98
 
a224a43
 
 
4bd2a98
 
 
 
 
a224a43
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# -*- coding: utf-8 -*-
"""translation practice.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1KrnodZGBZrUFdaJ9FIn8IhtWtCL7peoE
"""
import requests
import gradio as gr
from dotenv import load_dotenv
import os
#from openai import OpenAI
from langchain_openai import OpenAI
import spacy
#from langchain.chat_models import ChatOpenAI
from langchain_openai import ChatOpenAI
from langchain.schema import AIMessage, HumanMessage
import pandas as pd

# Load environment variables from .env file
load_dotenv()

# Access the env
HF_TOKEN = os.getenv('HUGGING_FACE_TOKEN')

# openai setup
# client = OpenAI(
#   api_key=os.getenv('OPENAI_API_KEY')
# )

# hugging face setup
#model_name = "mmnga/ELYZA-japanese-Llama-2-7b-instruct-gguf"
API_URL = f"https://api-inference.huggingface.co/models/"
#API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
headers = {"Authorization": f"Bearer {HF_TOKEN}"}

# Global variable to control debug printing
DEBUG_MODE = True

def debug_print(*args, **kwargs):
    if DEBUG_MODE:
        print(*args, **kwargs)

def split_sentences_ginza(input_text):
    nlp = spacy.load("ja_core_news_sm")
    doc = nlp(input_text)
    sentences = [sent.text for sent in doc.sents]
    return sentences


file_path = 'anki_japanese_english_pairs.csv'

def load_csv(file_path):
    # Load the CSV file into a DataFrame
    df = pd.read_csv(file_path)
    
    return df

def get_sentence_pair(df):

    # Get a random row from the DataFrame
    random_row = df.sample(1)
    #debug_print("### random_row:", random_row)

    #print(random_row.shape)

    japanese_sentence = str(random_row.iloc[0, 0])
    english_sentence = str(random_row.iloc[0,  1])

    debug_print("### Japanese sentence:", japanese_sentence)
    debug_print("### English sentence:", english_sentence)


    return japanese_sentence, english_sentence


japanese_sentence, english_sentence = get_sentence_pair(load_csv(file_path))


llm = ChatOpenAI(temperature=0.7, model='gpt-3.5-turbo')

def predict(message, history):

    # Define your initial setup prompt here
    initial_setup = f'''
    Japanese students are learning to translate Japanese text to English text. They will be given a Japanese sentence to translate, and will provide an English translation attempt. 
    Based on the feedback you provide, they will revise their translation. This process will continue until their translation is accurate. 

    Encourage the student by specifying the strengths of their writing.
    DO NOT PROVIDE THE CORRECT ENGLISH TRANSLATION until the student gets the correct translation. Let the student work it out.
    Provide your feedback as a list in the format: a, b, c etc.
    
    Execute the following tasks step by step:
    1. Ask the student to translate the following sentence from Japanese to English: {japanese_sentence}. Here is the English translation for reference: {english_sentence}
    2. Suggest only mechanical corrections (i.e., spelling, grammar, and punctuation) for the student. Ask for another translation attempt.

    Start by asking the student to translate the Japanese sentence.
    '''

    # removed from prompt
    # The student's translation need not match the provided English translation exactly, but it should be accurate to the Japanese text.

    # Start your history with a SystemMessage containing the setup prompt
    history_langchain_format = [AIMessage(content=initial_setup)]
    #history_langchain_format.append(HumanMessage(content="Let's start."))


    for human, ai in history:
        if human is not None:
            history_langchain_format.append(HumanMessage(content=human)) # convert to str to avoid error; not compatible with multimodal
        if ai is not None:
            history_langchain_format.append(AIMessage(content=ai))

    history_langchain_format.append(HumanMessage(content=message))

    debug_print("### Full history: ", history_langchain_format)
    gpt_response = llm(history_langchain_format)
    return gpt_response.content

welcome_message = "Hi! 👋. Are you ready to practise translation?"

app = gr.ChatInterface(fn=predict, title="Translation Chatbot", chatbot=gr.Chatbot(value=[(None, welcome_message)],),)#, multimodal=True) # chatbot=gr.Chatbot(value=[["Welcome 👋. I am an assistant",]])



app.launch()