Spaces:

flowers-team
/

SocialAISchool

Running

File size: 20,604 Bytes

be5548b

# python -m scripts.LLM_test  --gif test_GPT_boxes --episodes 1 --max-steps 8 --model text-davinci-003 --env-args size 6 --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_color_boxes.txt
# python -m scripts.LLM_test  --gif test_GPT_asoc --episodes 1 --max-steps 8 --model text-ada-001 --env-args size 6  --env-name SocialAI-AsocialBoxInformationSeekingParamEnv-v1 --in-context-path llm_data/in_context_asocial_box.txt --feed-full-ep

# python -m scripts.LLM_test  --gif test_GPT_boxes --episodes 1 --max-steps 8 --model bloom_560m --env-args size 6 --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_color_boxes.txt
# python -m scripts.LLM_test  --gif test_GPT_asoc --episodes 1 --max-steps 8 --model bloom_560m --env-args size 6  --env-name SocialAI-AsocialBoxInformationSeekingParamEnv-v1 --in-context-path llm_data/in_context_asocial_box.txt --feed-full-ep

## bloom 560m
# boxes
# python -m scripts.LLM_test --log llm_log/bloom_560m_boxes_no_hist  --gif evaluation --episodes 20 --max-steps 10 --model bloom_560m --env-args size 6 --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_color_boxes.txt

# asocial
# python -m scripts.LLM_test --log llm_log/bloom_560m_asocial_no_hist   --gif evaluation --episodes 20 --max-steps 10 --model bloom_560m --env-args size 6  --env-name SocialAI-AsocialBoxInformationSeekingParamEnv-v1 --in-context-path llm_data/in_context_asocial_box.txt

# random
# python -m scripts.LLM_test --log llm_log/random_boxes  --gif evaluation --episodes 20 --max-steps 10 --model random --env-args size 6 --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_color_boxes.txt

import argparse
import json
import requests
import time
import warnings
from n_tokens import estimate_price

import numpy as np
import torch
from pathlib import Path

from utils.babyai_utils.baby_agent import load_agent
from utils import *
from models import *
import subprocess
import os

from matplotlib import pyplot as plt

from gym_minigrid.wrappers import *
from gym_minigrid.window import Window
from datetime import datetime

from imageio import mimsave

def prompt_preprocessor(llm_prompt):
    # remove peer observations
    lines = llm_prompt.split("\n")
    new_lines = []
    for line in lines:
        if line.startswith("#"):
            continue

        elif line.startswith("Conversation"):
            continue

        elif "peer" in line:
            caretaker = True
            if caretaker:
                # show only the location of the caretaker

                # this is very ugly, todo: refactor this
                assert "there is a" in line
                start_index = line.index('there is a') + 11
                new_line = line[:start_index] + 'caretaker'

                new_lines.append(new_line)

            else:
                # no caretaker at all
                if line.startswith("Obs :") and "peer" in line:
                    # remove only the peer descriptions
                    line = "Obs :"
                    new_lines.append(line)
                else:
                    assert "peer" in line

        elif "Caretaker:" in line:
            # line = line.replace("Caretaker:", "Caretaker says: '") + "'"
            new_lines.append(line)

        else:
            new_lines.append(line)

    return "\n".join(new_lines)


# Parse arguments

parser = argparse.ArgumentParser()
parser.add_argument("--model", required=False,
                    help="text-ada-001")
parser.add_argument("--seed", type=int, default=0,
                    help="Seed of the first episode. The seed for the following episodes will be used in order: seed, seed + 1, ... seed + (n_episodes-1) (default: 0)")
parser.add_argument("--max-steps", type=int, default=5,
                    help="max num of steps")
parser.add_argument("--shift", type=int, default=0,
                    help="number of times the environment is reset at the beginning (default: 0)")
parser.add_argument("--argmax", action="store_true", default=False,
                    help="select the action with highest probability (default: False)")
parser.add_argument("--pause", type=float, default=0.5,
                    help="pause duration between two consequent actions of the agent (default: 0.5)")
parser.add_argument("--env-name", type=str,
                    # default="SocialAI-ELangColorBoxesTestInformationSeekingParamEnv-v1",
                    # default="SocialAI-AsocialBoxInformationSeekingParamEnv-v1",
                    default="SocialAI-ColorBoxesLLMCSParamEnv-v1",
                    required=False,
                    help="env name")
parser.add_argument("--in-context-path", type=str,
                    # default='llm_data/short_in_context_boxes.txt'
                    # default='llm_data/in_context_asocial_box.txt'
                    default='llm_data/in_context_color_boxes.txt',
                    required=False,
                    help="path to in context examples")
parser.add_argument("--gif", type=str, default="visualization",
                    help="store output as gif with the given filename", required=False)
parser.add_argument("--episodes", type=int, default=1,
                    help="number of episodes to visualize")
parser.add_argument("--env-args", nargs='*', default=None)
parser.add_argument("--agent_view", default=False, help="draw the agent sees (partially observable view)", action='store_true' )
parser.add_argument("--tile_size", type=int, help="size at which to render tiles", default=32 )
parser.add_argument("--mask-unobserved", default=False, help="mask cells that are not observed by the agent", action='store_true' )
parser.add_argument("--log", type=str, default="llm_log/episodes_log", help="log from the run", required=False)
parser.add_argument("--feed-full-ep", default=False, help="weather to append the whole episode to the prompt", action='store_true')
parser.add_argument("--skip-check", default=False, help="Don't estimate the price.", action="store_true")

args = parser.parse_args()

# Set seed for all randomness sources

seed(args.seed)

model = args.model


in_context_examples_path = args.in_context_path

print("env name:", args.env_name)
print("examples:", in_context_examples_path)
print("model:", args.model)

# datetime
now = datetime.now()
datetime_string = now.strftime("%d_%m_%Y_%H:%M:%S")
print(datetime_string)

# log filenames

log_folder = args.log+"_"+datetime_string+"/"
os.mkdir(log_folder)
evaluation_log_filename = log_folder+"evaluation_log.json"
prompt_log_filename = log_folder + "prompt_log.txt"
ep_h_log_filename = log_folder+"episode_history_query.txt"
gif_savename = log_folder + args.gif + ".gif"

assert "viz" not in gif_savename # don't use viz anymore


env_args = env_args_str_to_dict(args.env_args)
env = make_env(args.env_name, args.seed, env_args)

# env = gym.make(args.env_name, **env_args)
print(f"Environment {args.env_name} and args: {env_args_str_to_dict(args.env_args)}\n")

# Define agent
print("Agent loaded\n")

# prepare models

if args.model in ["text-davinci-003", "text-ada-001", "gpt-3.5-turbo-0301"]:
    import openai
    openai.api_key = os.getenv("OPENAI_API_KEY")

elif args.model in ["gpt2_large", "api_bloom"]:
    HF_TOKEN = os.getenv("HF_TOKEN")

elif args.model in ["bloom_560m"]:
    from transformers import BloomForCausalLM
    from transformers import BloomTokenizerFast

    hf_tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-560m", cache_dir=".cache/huggingface/")
    hf_model = BloomForCausalLM.from_pretrained("bigscience/bloom-560m", cache_dir=".cache/huggingface/")

elif args.model in ["bloom"]:
    from transformers import BloomForCausalLM
    from transformers import BloomTokenizerFast

    hf_tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom", cache_dir=".cache/huggingface/")
    hf_model = BloomForCausalLM.from_pretrained("bigscience/bloom", cache_dir=".cache/huggingface/")


def plt_2_rgb(env):
    # data = np.frombuffer(env.window.fig.canvas.tostring_rgb(), dtype=np.uint8)
    # data = data.reshape(env.window.fig.canvas.get_width_height()[::-1] + (3,))

    width, height = env.window.fig.get_size_inches() * env.window.fig.get_dpi()
    data = np.fromstring(env.window.fig.canvas.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
    return data

def generate(text_input, model):
    # return "(a) move forward"
    if model == "dummy":
        print("dummy action forward")
        return "move forward"

    elif model == "random":
        print("random agent")
        return random.choice([
            "move forward",
            "turn left",
            "turn right",
            "toggle",
        ])

    elif model in ["gpt-3.5-turbo-0301"]:
        while True:
            try:
                c = openai.ChatCompletion.create(
                    model=model,
                    messages=[
                        # {"role": "system", "content": ""},
                        # {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
                        # {"role": "user", "content": "Continue the following text in the most logical way.\n"+text_input}
                        {"role": "user", "content": text_input}
                    ],
                    max_tokens=3,
                    n=1,
                    temperature=0,
                    request_timeout=30,
                )
                break
            except Exception as e:
                print(e)
                print("Pausing")
                time.sleep(10)
                continue
        print("generation: ", c['choices'][0]['message']['content'])
        return c['choices'][0]['message']['content']

    elif model in ["text-davinci-003", "text-ada-001"]:
        while True:
            try:
                response = openai.Completion.create(
                    model=model,
                    prompt=text_input,
                    # temperature=0.7,
                    temperature=0.0,
                    max_tokens=3,
                    top_p=1,
                    frequency_penalty=0,
                    presence_penalty=0,
                    timeout=30
                )
                break

            except Exception as e:
                print(e)
                print("Pausing")
                time.sleep(10)
                continue

        choices = response["choices"]
        assert len(choices) == 1
        return choices[0]["text"].strip().lower()  # remove newline from the end

    elif model in ["gpt2_large", "api_bloom"]:
        # HF_TOKEN = os.getenv("HF_TOKEN")
        if model == "gpt2_large":
            API_URL = "https://api-inference.huggingface.co/models/gpt2-large"

        elif model == "api_bloom":
            API_URL = "https://api-inference.huggingface.co/models/bigscience/bloom"

        else:
            raise ValueError(f"Undefined model {model}.")

        headers = {"Authorization": f"Bearer {HF_TOKEN}"}

        def query(text_prompt, n_tokens=3):

            input = text_prompt

            # make n_tokens request and append the output each time - one request generates one token

            for _ in range(n_tokens):
                # prepare request
                payload = {
                    "inputs": input,
                    "parameters": {
                        "do_sample": False,
                        'temperature': 0,
                        'wait_for_model': True,
                        # "max_length": 500,  # for gpt2
                        # "max_new_tokens": 250  # fot gpt2-xl
                    },
                }
                data = json.dumps(payload)

                # request
                response = requests.request("POST", API_URL, headers=headers, data=data)
                response_json = json.loads(response.content.decode("utf-8"))

                if type(response_json) is list and len(response_json) == 1:
                    # generated_text contains the input + the response
                    response_full_text = response_json[0]['generated_text']

                    # we use this as the next input
                    input = response_full_text

                else:
                    print("Invalid request to huggingface api")
                    from IPython import embed; embed()

            # remove the prompt from the beginning
            assert response_full_text.startswith(text_prompt)
            response_text = response_full_text[len(text_prompt):]

            return response_text

        response = query(text_input).strip().lower()
        return response

    elif model in ["bloom_560m"]:
        # from transformers import BloomForCausalLM
        # from transformers import BloomTokenizerFast
        #
        # tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-560m", cache_dir=".cache/huggingface/")
        # model = BloomForCausalLM.from_pretrained("bigscience/bloom-560m", cache_dir=".cache/huggingface/")

        inputs = hf_tokenizer(text_input, return_tensors="pt")
        # 3 words
        result_length = inputs['input_ids'].shape[-1]+3
        full_output = hf_tokenizer.decode(hf_model.generate(inputs["input_ids"], max_length=result_length)[0])

        assert full_output.startswith(text_input)
        response = full_output[len(text_input):]

        response = response.strip().lower()

        return response

    else:
        raise ValueError("Unknown model.")

def get_parsed_action(text_action):
    if "move forward" in text_action:
        return "move forward"

    elif "turn left" in text_action:
        return "turn left"

    elif "turn right" in text_action:
        return "turn right"

    elif "toggle" in text_action:
        return "toggle"

    elif "no_op" in text_action:
        return "no_op"
    else:
        warnings.warn(f"Undefined action {text_action}")
        return "no_op"


def step(text_action):
    text_action = get_parsed_action(text_action)

    if "move forward" == text_action:
        action = [int(env.actions.forward), np.nan, np.nan]

    elif "turn left" == text_action:
        action = [int(env.actions.left), np.nan, np.nan]

    elif "turn right" == text_action:
        action = [int(env.actions.right), np.nan, np.nan]

    elif "toggle" == text_action:
        action = [int(env.actions.toggle), np.nan, np.nan]

    elif "no_op" == text_action:
        action = [np.nan, np.nan, np.nan]

    # if text_action.startswith("a"):
    #     action = [int(env.actions.forward), np.nan, np.nan]
    #
    # elif text_action.startswith("b"):
    #     action = [int(env.actions.left), np.nan, np.nan]
    #
    # elif text_action.startswith("c"):
    #     action = [int(env.actions.right), np.nan, np.nan]
    #
    # elif text_action.startswith("d"):
    #     action = [int(env.actions.toggle), np.nan, np.nan]
    #
    # elif text_action.startswith("e"):
    #     action = [np.nan, np.nan, np.nan]
    #
    # else:
    #     print("Unknown action.")

    obs, reward, done, info = env.step(action)

    return obs, reward, done, info



def reset(env):
    env.reset()
    # a dirty trick just to get obs and info
    return step("no_op")


def generate_text_obs(obs, info):
    llm_prompt = "Obs : "
    llm_prompt += "".join(info["descriptions"])
    if obs["utterance_history"] != "Conversation: \n":
        utt_hist = obs['utterance_history']
        utt_hist = utt_hist.replace("Conversation: \n","")
        llm_prompt += utt_hist

    return llm_prompt


def action_query():
    # llm_prompt = ""
    # llm_prompt += "Your possible actions are:\n"
    # llm_prompt += "(a) move forward\n"
    # llm_prompt += "(b) turn left\n"
    # llm_prompt += "(c) turn right\n"
    # llm_prompt += "(d) toggle\n"
    # llm_prompt += "(e) no_op\n"
    # llm_prompt += "Your next action is: ("
    llm_prompt = "Act :"
    return llm_prompt

# lod context examples
with open(in_context_examples_path, "r") as f:
    in_context_examples = f.read()

with open(prompt_log_filename, "a+") as f:
    f.write(datetime_string)

with open(ep_h_log_filename, "a+") as f:
    f.write(datetime_string)

feed_episode_history = args.feed_full_ep

# asoc
in_context_n_tokens = 800
ep_obs_len = 50 * 3

# color
in_context_n_tokens = 1434
# ep_obs_len = 70

# feed only current obs
if feed_episode_history:
    ep_obs_len = 50

else:
    # last_n = 1
    # last_n = 2
    last_n = 3
    ep_obs_len = 50 * last_n

_, price = estimate_price(
    num_of_episodes=args.episodes,
    in_context_len=in_context_n_tokens,
    ep_obs_len=ep_obs_len,
    n_steps=args.max_steps,
    model=args.model,
    feed_episode_history=feed_episode_history
)
if not args.skip_check:
    input(f"You will spend: {price} dollars. (in context: {in_context_n_tokens} obs: {ep_obs_len}), ok?")

# prepare frames list to save to gif
frames = []

assert args.max_steps <= 20

success_rates = []
# episodes start
for episode in range(args.episodes):
    print("Episode:", episode)
    new_episode_text = "New episode.\n"
    episode_history_text = new_episode_text

    success = False
    episode_seed = args.seed + episode
    env = make_env(args.env_name, episode_seed, env_args)

    with open(prompt_log_filename, "a+") as f:
        f.write("\n\n")

    observations = []
    actions = []
    for i in range(int(args.max_steps)):
        if i == 0:
            obs, reward, done, info = reset(env)
            action_text = ""

        else:
            with open(prompt_log_filename, "a+") as f:
                f.write("\nnew prompt: -----------------------------------\n")
                f.write(llm_prompt)

            text_action = generate(llm_prompt, args.model)
            obs, reward, done, info = step(text_action)
            action_text = f"Act : {get_parsed_action(text_action)}\n"
            actions.append(action_text)

            print(action_text)

        text_obs = generate_text_obs(obs, info)
        observations.append(text_obs)
        print(prompt_preprocessor(text_obs))

        # feed the full episode history
        episode_history_text += prompt_preprocessor(action_text + text_obs)  # append to history of this episode

        if feed_episode_history:
            # feed full episode history
            llm_prompt = in_context_examples + episode_history_text + action_query()

        else:
            n = min(last_n, len(observations))
            obs = observations[-n:]
            act = (actions + [action_query()])[-n:]

            episode_text = "".join([o+a for o,a in zip(obs, act)])

            llm_prompt = in_context_examples + new_episode_text + episode_text

        llm_prompt = prompt_preprocessor(llm_prompt)


        # save the image
        env.render(mode="human")
        rgb_img = plt_2_rgb(env)
        frames.append(rgb_img)

        if env.current_env.box.blocked and not env.current_env.box.is_open:
            # target box is blocked -> apple can't be obtained
            # break to save compute
            break

        if done:
            # quadruple last frame to pause between episodes
            for i in range(3):
                same_img = np.copy(rgb_img)
                # toggle a pixel between frames to avoid cropping when going from gif to mp4
                same_img[0, 0, 2] = 0 if (i % 2) == 0 else 255
                frames.append(same_img)

            if reward > 0:
                print("Success!")
                episode_history_text += "Success!\n"
                success = True
            else:
                episode_history_text += "Failure!\n"

            with open(ep_h_log_filename, "a+") as f:
                f.write("\nnew prompt: -----------------------------------\n")
                f.write(episode_history_text)

            break

        else:
            with open(ep_h_log_filename, "a+") as f:
                f.write("\nnew prompt: -----------------------------------\n")
                f.write(episode_history_text)

    print(f"{'Success' if success else 'Failure'}")
    success_rates.append(success)

mean_success_rate =  np.mean(success_rates)
print("Success rate:", mean_success_rate)
print(f"Saving gif to {gif_savename}.")
mimsave(gif_savename, frames, duration=args.pause)

print("Done.")

log_data_dict = vars(args)
log_data_dict["success_rates"] = success_rates
log_data_dict["mean_success_rate"] = mean_success_rate

print("Evaluation log: ", evaluation_log_filename)
with open(evaluation_log_filename, "w") as f:
    f.write(json.dumps(log_data_dict))