Spaces:

LuisAVasquez
/

LLMs_for_Art_Commentary

Sleeping

File size: 9,171 Bytes

4859d06



# emotion detection with artemis
# given a CSV with image filepaths in a column 'image_file',
# append a column 'grounding_emotion' and a column 'emotions' with the detected emotion.
# 'emotions' contains detected the emotions ordered by descending probability.
# 'grounding_emotion' contains the main emotion (the most probable one)

import pandas as pd
import numpy as np
from PIL import Image
import torch
from tqdm import tqdm


#### checkpoints
def save_checkpoint(checkpoint_path,df,  grounding_emo_list, emotions_list):
    output_df = df.copy()
    output_df['grounding_emotion'] = grounding_emo_list
    output_df['emotions'] = emotions_list
    output_df.to_csv(checkpoint_path,
        index= False, # don't write a new 'Index' column
    )
    print("Saved checkpoint!")

def load_checkpoint_grounding_emotion(checkpoint_path):
    try:
        print("reading checkpoint at ", checkpoint_path)
        df = pd.read_csv(checkpoint_path)
        
        cached_grounding_emotion= {
            row['image_file']: row['grounding_emotion']
            for _, row in df.iterrows()
        }
        print(f"Checkpoint loaded succesfully to cache: {len(cached_grounding_emotion)} processed files")
        return cached_grounding_emotion
    except:
        print("Checkpoint was not loaded")
        return cached_grounding_emotion_dict

def load_checkpoint_emotions(checkpoint_path):
    try:
        print("reading checkpoint at ", checkpoint_path)
        df = pd.read_csv(checkpoint_path)
        
        cached_emotions= {
            row['image_file']: row['emotions']
            for _, row in df.iterrows()
        }
        print(f"Checkpoint loaded succesfully to cache: {len(cached_emotions)} processed files")
        return cached_emotions
    except:
        print("Checkpoint was not loaded")
        return cached_emotions_dict

def get_checkpoint_path(output_path):
    #checkpoint_path = "checkpoint" + os.path.basename(output_path)
    #checkpoint_path = os.path.join( os.path.dirname(output_path), checkpoint_path)
    #return checkpoint_path
    return output_path



cached_grounding_emotion_dict = {} # to avoid recomputing
cached_emotions_dict = {} # to avoid recomputing

def get_all_emotions(filepath, model, cached_grounding_emotion_dict, cached_emotions_dict):
    emotions = cached_emotions_dict.get(filepath)
    grounding_emotion = cached_grounding_emotion_dict.get(filepath)
    if emotions is None:
        grounding_emotion, emotions = get_all_emotions_in_image(filepath, model)
        cached_grounding_emotion_dict[filepath] = grounding_emotion
        cached_emotions_dict[filepath] = emotions
    return grounding_emotion, emotions

def get_all_emotions_in_image(filepath, model):
    with Image.open(filepath).convert('RGB') as img:

        img = transformation(img).unsqueeze(0)# unsqueeze to add artificial first dimension
        
        # emotion detection
        emotion_vector = model(img) # apply the model
        emotion_vector = np.exp(emotion_vector.detach().numpy()) # calculate probabilities
        sorted_indices = (-emotion_vector).argsort()[0] # sort from most to least likely
        emotions = [(IDX_TO_EMOTION[ind], emotion_vector[0][ind]) for ind in sorted_indices]
        #construct the csv line
        #emotions_df['image_file'].append(filename)
        grounding_emotion = emotions[0][0]

        return grounding_emotion, emotions




################### utilities

#### Artemis emotions:
ARTEMIS_EMOTIONS = ['amusement', 'awe', 'contentment', 'excitement',
                    'anger', 'disgust',  'fear', 'sadness', 'something else']
EMOTION_TO_IDX = {e: i for i, e in enumerate(ARTEMIS_EMOTIONS)}
IDX_TO_EMOTION = {EMOTION_TO_IDX[e]: e for e in EMOTION_TO_IDX}


### Artemis image preprocessing

#from artemis.in_out.neural_net_oriented import image_transformation
import torchvision.transforms as transforms
image_net_mean = [0.485, 0.456, 0.406]
image_net_std = [0.229, 0.224, 0.225]

def image_transformation(img_dim, lanczos=True):
    """simple transformation/pre-processing of image data."""

    if lanczos:
        resample_method = Image.LANCZOS
    else:
        resample_method = Image.BILINEAR

    normalize = transforms.Normalize(mean=image_net_mean, std=image_net_std)
    img_transforms = dict()
    img_transforms['train'] = transforms.Compose([transforms.Resize((img_dim, img_dim), resample_method),
                                                  transforms.ToTensor(),
                                                  normalize])

    # Use same transformations as in train (since no data-augmentation is applied in train)
    img_transforms['test'] = img_transforms['train']
    img_transforms['val'] = img_transforms['train']
    img_transforms['rest'] = img_transforms['train']
    return img_transforms

transformation = image_transformation(255)['train']

### Artemis load model

#from artemis.in_out.neural_net_oriented import torch_load_model
import warnings
def torch_load_model(checkpoint_file, map_location=None):
    """ Wrap torch.load to catch standard warning of not finding the nested implementations.
    :param checkpoint_file:
    :param map_location:
    :return:
    """
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        model = torch.load(checkpoint_file, map_location=map_location)
    return model



########


######### performing emotion detection

def artemis_emotions_detection(input_file, output_file):
    checkpoint_path = get_checkpoint_path(output_file)
    cached_grounding_emotion_dict = load_checkpoint_grounding_emotion(checkpoint_path)
    cached_emotions_dict = load_checkpoint_emotions(checkpoint_path)

    #load the model
    # device = torch.device("cuda:" + str(0) if torch.cuda.is_available() else "cpu") # it is not working with gpus, only cpu :/
    device =torch.device("cpu")
    model = torch_load_model(img2emo_model_path, map_location=device)   
    
    recognized_grounding_emotion_per_image = []
    recognized_emotions_per_image = []
    processed_files = set(cached_emotions_dict.keys())

    df = pd.read_csv(input_file)

    iterable_list = list(enumerate( df['image_file']))
    
    for elem in tqdm(iterable_list):
        idx = elem[0]
        filepath = elem[1]

        #save checkpoint every 50 files
        if (not (len(processed_files) % 49) 
            ): 
            print(f"Images processed: {len(processed_files)}")
            save_checkpoint(
                checkpoint_path,
                df.iloc[:idx],  
                recognized_grounding_emotion_per_image, 
                recognized_emotions_per_image
            )

        grounding_emotion, emotions = get_all_emotions(
            filepath, 
            model, 
            cached_grounding_emotion_dict, 
            cached_emotions_dict
        )
        
        recognized_grounding_emotion_per_image.append(grounding_emotion)
        recognized_emotions_per_image.append(emotions)
        processed_files.add(filepath)

    recognized_grounding_emotion_per_image = pd.Series(recognized_grounding_emotion_per_image)
    recognized_emotions_per_image = pd.Series(recognized_emotions_per_image)

    return recognized_grounding_emotion_per_image, recognized_emotions_per_image

    
import argparse



if __name__ == "__main__":

    parser = argparse.ArgumentParser(prog="Artemis emotion detection",
                                     description='Recognizes the emotions per image in an image list')
    
    parser.add_argument("--input_file", "-in", metavar='in', type=str, nargs=1,
                        help='input file containing images-paths for emotion detection.',
                             #default=[default_painting_folder]
                             )
    parser.add_argument("--output_file", "-out", metavar='out', type=str, nargs=1,
                        help='output file containing images-paths + grounding (main) emotion + ranked emotions'
                        #default=[default_interpretation_folder]
                         )
    parser.add_argument("--model_path", "-mp", metavar='mp', type=str, nargs=1,
                        help='artemis img2emo model path'
                        #default=[default_interpretation_folder]
                         )
                         
    args = parser.parse_args()
    input_csv_file = args.input_file[0]
    output_csv_file = args.output_file[0]

    #where is the image to emotion classifier?
    img2emo_model_path = args.model_path[0]

    print(">>> input file: " , input_csv_file)
    print(">>> output file: ", output_csv_file)
    print(">>> img to emotion model path: ", img2emo_model_path)

    # perform object recognition
    recognized_grounding_emotion_per_image, recognized_emotions_per_image = artemis_emotions_detection(input_csv_file, output_csv_file)
    
    # add a column with the recognized objects
    output_df = pd.read_csv(input_csv_file)
    output_df['grounding_emotion'] = recognized_grounding_emotion_per_image
    output_df['emotions'] = recognized_emotions_per_image
    
    output_df.to_csv(output_csv_file,
        index= False, # don't write a new 'Index' column
    )