import json
import time
import openai
import sklearn
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
openai.api_key = '' 

incontext = "Given the fact: All Cantonese are southerners. Some Cantonese don't like chili. Does it follow that: Some southerners don't like chili. Yes or no? yes\nGiven the fact: It is difficult for cactus to survive in humid climates; citrus is difficult to grow in cold climates. In most parts of a province, at least one species is not difficult to survive and grow between cactus and citrus. Does it follow that: Half of the province is humid and cold. Yes or no? no\nGiven the fact: It is difficult for cactus to survive in humid climates; citrus is difficult to grow in cold climates. In most parts of a province, at least one species is not difficult to survive and grow between cactus and citrus. Does it follow that: Most of the province is hot. Yes or no? no\nGiven the fact: It is difficult for cactus to survive in humid climates; citrus is difficult to grow in cold climates. In most parts of a province, at least one species is not difficult to survive and grow between cactus and citrus. Does it follow that: Most of the province is either dry or warm. Yes or no? yes\n"
def gpt3_api(prompt):
   response = openai.Completion.create(
      model="text-davinci-002",
      prompt=incontext + prompt,
      temperature=0,
      max_tokens=60,
      top_p=1.0,
      frequency_penalty=0.0,
      presence_penalty=0.0
   )
   return response

with open('test1.txt') as f:
   c = 0
   y_true = []
   y_pred = []
   lines = f.readlines()
   for i, line in enumerate(lines):
      line_dict = json.loads(line)

      label = 0 if line_dict['label']=="not entailed" else 1
      maj_premise = ' '.join(line_dict['major_premise'])
      min_premise = ' '.join(line_dict['minor_premise'])
      hypo = line_dict['conclusion']
      prompt_input = "Given the fact: " + maj_premise + ' ' + min_premise + " Does it follow that: " + hypo + " Yes or no?"

      y_true.append(label)
      prompt = prompt_input
      output = gpt3_api(prompt)
      time.sleep(5)
      pred = output.choices[0].text.lower()
      y_pred.append(pred)

   print(y_true)
   print(y_pred)
   f_score = f1_score(y_true, y_pred, average='binary')
   p_score = precision_score(y_true, y_pred, average='binary')
   r_score = recall_score(y_true, y_pred, average='binary')
   acc = accuracy_score(y_true, y_pred)
   print(f_score)
   print(p_score)
   print(r_score)
   print(acc)