Spaces:

gundruke
/

ua-thesis-absa

Sleeping

App Files Files Community

gundruke commited on Jun 4, 2023

Commit

584d3dc

•

1 Parent(s): dc9f186

added app file

Browse files

Files changed (8) hide show

app.py +149 -0
dictionary.json +899 -0
models/BERT_LSTM_CRF.py +78 -0
models/__init__.py +1 -0
models/layers/CRF.py +353 -0
models/layers/__init__.py +1 -0
models/layers/__pycache__/CRF.cpython-310.pyc +0 -0
models/layers/__pycache__/__init__.cpython-310.pyc +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import gradio as gr
+import torch
+import json
+from nltk.corpus import wordnet
+from transformers import AutoConfig, AutoTokenizer
+from models import BERTLstmCRF
+from huggingface_hub import hf_hub_download
+checkpoint = "gundruke/bert-lstm-crf-absa"
+config = AutoConfig.from_pretrained(checkpoint)
+id2label = config.id2label
+tokenizer = AutoTokenizer.from_pretrained("gundruke/bert-lstm-crf-absa")
+model = BERTLstmCRF(config)
+repo = "gundruke/bert-lstm-crf-absa"
+filename = "pytorch_model.bin"
+model.load_state_dict(torch.load(hf_hub_download(repo_id=repo, filename=filename),
+                                 map_location=torch.device('cpu')))
+def tokenize_text(text):
+    tokens = tokenizer.tokenize(text)
+    tokenized_text = tokenizer(text)
+    return tokens, tokenized_text
+def convert_to_multilabel(label_list):
+    multilabel = []
+    if "B-POS" in label_list or "I-POS" in label_list:
+        multilabel.append("Positive")
+    if "B-NEG" in label_list or "I-NEG" in label_list:
+        multilabel.append("Negative")
+    if "B-NEU" in label_list or "I-NEU" in label_list:
+        multilabel.append("Neutral")
+    return " and ".join(multilabel)
+def classify_word(word, dictionary):
+    synsets = wordnet.synsets(word)
+    if synsets:
+        hypernyms = synsets[0].hypernyms()  # Get the hypernym of the first synset
+        if hypernyms:
+            nltk_result = hypernyms[0].lemmas()[0].name()
+        else:
+            nltk_result = "Unknown"
+    else:
+        nltk_result = "Unknown"
+    if word in dictionary:
+        result = dictionary[word]
+    elif nltk_result in ['atmosphere', 'drinks', 'food', 'price', 'service']:
+        result = nltk_result
+    else:
+        result = 'other'
+    return result, nltk_result
+def get_outputs(tokenized_text):
+    input_ids = tokenized_text["input_ids"]
+    token_type_ids = tokenized_text["token_type_ids"]
+    attention_mask = tokenized_text["attention_mask"]
+    inputs = {
+        'input_ids': torch.tensor([input_ids]),
+        'token_type_ids': torch.tensor([token_type_ids]),
+        'attention_mask': torch.tensor([attention_mask])
+    }
+    with torch.no_grad():
+        outputs = model(**inputs)
+    labels = [id2label.get(i) for i in torch.flatten(outputs[1]).tolist()][1:-1]
+    return labels
+def join_wordpieces(tokens, labels):
+    joined_tokens =  []
+    for token, label in zip(tokens, labels):
+        if label == "O":
+            label = None
+        if token.startswith("##"):
+            last_token = joined_tokens[-1][0]
+            joined_tokens[-1] = (last_token+token[2:], label)
+        else:
+            joined_tokens.append((token, label))
+    return joined_tokens
+def get_category(word, dict_file):
+    with open(dict_file, "r") as file:
+        dictionary = json.load(file)
+    r, n = classify_word(word, dictionary)
+    return r
+def text_analysis(text):
+    tokens, tokenized_text = tokenize_text(text)
+    labels = get_outputs(tokenized_text)
+    multilabel = convert_to_multilabel(labels)
+    token_tuple = join_wordpieces(tokens, labels)
+    tokenized_text["tokens"] = tokens
+    categories = []
+    for tok in token_tuple:
+        if tok[1]:
+            categories.append((tok[0], get_category(tok[0], "dictionary.json")))
+        else:
+            categories.append((tok[0], None))
+    return token_tuple, multilabel, categories
+theme = gr.themes.Base()
+with gr.Blocks(theme=theme) as demo:
+    with gr.Column():
+        input_textbox = gr.Textbox(placeholder="Enter sentence here...")
+        btn = gr.Button("Submit", variant="primary")
+        btn.click(fn=text_analysis,
+                  inputs=input_textbox,
+                    outputs=[gr.HighlightedText(label="Token labels"),
+                             gr.Label(label="Multilabel classification"),
+                             gr.HighlightedText(label="Category")],
+                    queue=False)
+    with gr.Column():
+        examples=[
+            ["I've been coming here as a child and always come back for the taste."],
+            ["The tea is great and all the sweets are homemade."],
+            ["Strong build which really adds to its durability but poor battery life."],
+            ["We loved the recommendation for the wine, and I think the eggplant parmigiana appetizer should become an entree."]
+            ]
+        gr.Examples(examples, input_textbox)
+demo.launch()

dictionary.json ADDED Viewed

	@@ -0,0 +1,899 @@

+{
+    "afternoon": "other",
+    "alfredo": "food",
+    "alternatives": "other",
+    "amazin": "other",
+    "ambiance": "atmosphere",
+    "ambience": "atmosphere",
+    "anchovy": "food",
+    "and": "other",
+    "apetizers": "food",
+    "appetizer": "food",
+    "appetizers": "food",
+    "apple": "food",
+    "area": "other",
+    "argentine": "food",
+    "array": "other",
+    "artifical": "other",
+    "asian": "food",
+    "asparagus": "food",
+    "assortment": "food",
+    "atmoshere": "atmosphere",
+    "atmosphere": "atmosphere",
+    "attitude": "service",
+    "avocado": "food",
+    "back": "other",
+    "baclava": "food",
+    "baked": "food",
+    "ball": "other",
+    "banana": "food",
+    "bar": "atmosphere",
+    "barley": "food",
+    "bartender": "service",
+    "bartenders": "service",
+    "base": "food",
+    "bathroom": "service",
+    "bbq": "food",
+    "beans": "food",
+    "beef": "food",
+    "beer": "drinks",
+    "beers": "drinks",
+    "beets": "food",
+    "benedict": "food",
+    "bi": "other",
+    "big": "other",
+    "bill": "price",
+    "billed": "price",
+    "bistro": "atmosphere",
+    "black": "food",
+    "blended": "food",
+    "blue": "food",
+    "blueberry": "food",
+    "booth": "atmosphere",
+    "bottle": "drinks",
+    "boutique": "other",
+    "braised": "food",
+    "branzini": "food",
+    "bread": "food",
+    "breads": "food",
+    "breakfast": "food",
+    "brisket": "food",
+    "brulee": "food",
+    "brunch": "food",
+    "buffalo": "food",
+    "burger": "food",
+    "burgers": "food",
+    "burrito": "food",
+    "butter": "food",
+    "by": "other",
+    "caeser": "food",
+    "cajun": "food",
+    "cake": "food",
+    "cakes": "food",
+    "calamari": "food",
+    "calf": "food",
+    "canai": "food",
+    "candlelight": "atmosphere",
+    "carinthia": "food",
+    "carrots": "food",
+    "cart": "other",
+    "casseroles": "food",
+    "casual": "atmosphere",
+    "catfish": "food",
+    "caviar": "food",
+    "chair": "atmosphere",
+    "chairs": "atmosphere",
+    "cheese": "food",
+    "cheeses": "food",
+    "chef": "service",
+    "cherry": "food",
+    "chick": "food",
+    "chicken": "food",
+    "chickens": "food",
+    "chickpea": "food",
+    "chickpeas": "food",
+    "chili": "food",
+    "chillis": "food",
+    "chinese": "food",
+    "chocolate": "food",
+    "choices": "other",
+    "chops": "food",
+    "chorizo": "food",
+    "churrasco": "food",
+    "cinna": "food",
+    "classics": "food",
+    "clientele": "other",
+    "cobb": "food",
+    "cocktail": "drinks",
+    "cocoa": "food",
+    "coconut": "food",
+    "cod": "food",
+    "codfish": "food",
+    "coffee": "drinks",
+    "cold": "food",
+    "concoctions": "drinks",
+    "confitte": "food",
+    "cooked": "food",
+    "cookie": "food",
+    "cookies": "food",
+    "corn": "food",
+    "corner": "other",
+    "cosi": "other",
+    "cost": "price",
+    "counter": "other",
+    "courses": "food",
+    "crab": "food",
+    "crabcakes": "food",
+    "cranberry": "food",
+    "creamy": "food",
+    "creme": "food",
+    "creole": "food",
+    "crepes": "food",
+    "crust": "food",
+    "crusted": "food",
+    "cuccumber": "food",
+    "cuisine": "food",
+    "curried": "food",
+    "curry": "food",
+    "dabs": "food",
+    "dance": "atmosphere",
+    "day": "other",
+    "de": "other",
+    "deco": "atmosphere",
+    "decor": "atmosphere",
+    "decoration": "atmosphere",
+    "delights": "food",
+    "delivary": "service",
+    "delivers": "service",
+    "delivery": "service",
+    "delmonico": "food",
+    "deserts": "food",
+    "design": "atmosphere",
+    "dessert": "food",
+    "desserts": "food",
+    "dill": "food",
+    "dine": "service",
+    "dining": "atmosphere",
+    "dinner": "food",
+    "dinners": "food",
+    "dip": "food",
+    "dipping": "food",
+    "disco": "atmosphere",
+    "dish": "food",
+    "dishes": "food",
+    "dishs": "food",
+    "display": "other",
+    "dog": "food",
+    "dogs": "food",
+    "donut": "food",
+    "downstairs": "other",
+    "dressed": "food",
+    "dressing": "food",
+    "drink": "drinks",
+    "drinks": "drinks",
+    "duck": "food",
+    "dumplings": "food",
+    "duo": "other",
+    "eastern": "food",
+    "eating": "other",
+    "egg": "food",
+    "eggplant": "food",
+    "eggs": "food",
+    "emiliana": "food",
+    "empenadas": "food",
+    "english": "food",
+    "entertainment": "atmosphere",
+    "entree": "food",
+    "entrees": "food",
+    "erbazzone": "food",
+    "escargot": "food",
+    "experience": "atmosphere",
+    "fajita": "food",
+    "falafal": "food",
+    "falafel": "food",
+    "famous": "other",
+    "fare": "food",
+    "female": "other",
+    "fennel": "food",
+    "fettuccine": "food",
+    "fettucino": "food",
+    "filet": "food",
+    "fish": "food",
+    "fixe": "price",
+    "flan": "food",
+    "flavor": "food",
+    "flavored": "food",
+    "flavors": "food",
+    "floor": "atmosphere",
+    "focacchia": "food",
+    "foie": "food",
+    "folding": "other",
+    "food": "food",
+    "foods": "food",
+    "fooood": "food",
+    "for": "other",
+    "fork": "other",
+    "fortune": "other",
+    "french": "food",
+    "fresh": "food",
+    "fried": "food",
+    "fries": "food",
+    "frosty": "food",
+    "fruit": "food",
+    "fusion": "food",
+    "garden": "atmosphere",
+    "garlic": "food",
+    "gelato": "food",
+    "ginger": "food",
+    "glass": "other",
+    "gnocchi": "food",
+    "goat": "food",
+    "gorgonzola": "food",
+    "gosht": "food",
+    "grand": "other",
+    "gras": "food",
+    "gratin": "food",
+    "gratuity": "price",
+    "greek": "food",
+    "green": "food",
+    "greens": "food",
+    "grill": "food",
+    "grilled": "food",
+    "ground": "food",
+    "guacamole": "food",
+    "ham": "food",
+    "hamburger": "food",
+    "happy": "other",
+    "hibiscus": "food",
+    "hint": "other",
+    "homemade": "food",
+    "honey": "food",
+    "hong": "food",
+    "host": "service",
+    "hostess": "service",
+    "hot": "food",
+    "hotdogs": "food",
+    "hour": "other",
+    "humus": "food",
+    "ice": "food",
+    "iced": "drinks",
+    "in": "other",
+    "indian": "food",
+    "ingredients": "food",
+    "interior": "atmosphere",
+    "italian": "food",
+    "items": "other",
+    "jap": "food",
+    "japanese": "food",
+    "jazz": "atmosphere",
+    "jerusalem": "food",
+    "juice": "drinks",
+    "juices": "drinks",
+    "kalmata": "food",
+    "kebabs": "food",
+    "kickers": "food",
+    "kimono": "other",
+    "king": "food",
+    "kitchen": "service",
+    "knots": "food",
+    "kompot": "drinks",
+    "kong": "other",
+    "korean": "food",
+    "lamb": "food",
+    "large": "other",
+    "lasagna": "food",
+    "latkes": "food",
+    "latte": "drinks",
+    "leaves": "food",
+    "lemon": "food",
+    "lemonade": "drinks",
+    "lettuce": "food",
+    "li": "other",
+    "life": "other",
+    "light": "atmosphere",
+    "lime": "food",
+    "linguini": "food",
+    "lobster": "food",
+    "location": "atmosphere",
+    "lomo": "food",
+    "long": "other",
+    "lovely": "atmosphere",
+    "low": "other",
+    "lunch": "food",
+    "lychee": "food",
+    "madison": "food",
+    "main": "food",
+    "make": "other",
+    "maki": "food",
+    "mango": "food",
+    "margherita": "food",
+    "margarita": "drinks",
+    "martini": "drinks",
+    "martinis": "drinks",
+    "masala": "food",
+    "mashed": "food",
+    "massaman": "food",
+    "matzo": "food",
+    "meal": "food",
+    "meat": "food",
+    "meatballs": "food",
+    "mediterranean": "food",
+    "melon": "food",
+    "menu": "food",
+    "meringue": "food",
+    "met": "other",
+    "microbrews": "drinks",
+    "midtown": "other",
+    "milk": "food",
+    "mimosa": "drinks",
+    "minestrone": "food",
+    "mixed": "food",
+    "mojito": "drinks",
+    "monkfish": "food",
+    "more": "other",
+    "mousse": "food",
+    "muffin": "food",
+    "muffins": "food",
+    "mushroom": "food",
+    "mushrooms": "food",
+    "music": "atmosphere",
+    "musical": "atmosphere",
+    "mustard": "food",
+    "nasi": "food",
+    "natural": "atmosphere",
+    "noodles": "food",
+    "north": "other",
+    "nova": "food",
+    "oatmeal": "food",
+    "oil": "food",
+    "olives": "food",
+    "omelette": "food",
+    "onion": "food",
+    "open": "other",
+    "opener": "other",
+    "option": "other",
+    "options": "other",
+    "orange": "food",
+    "organic": "food",
+    "out": "other",
+    "outside": "other",
+    "over": "other",
+    "paella": "food",
+    "pan": "food",
+    "pancake": "food",
+    "pancakes": "food",
+    "parfait": "food",
+    "pasta": "food",
+    "pastries": "food",
+    "patties": "food",
+    "peanut": "food",
+    "pear": "food",
+    "pears": "food",
+    "pecan": "food",
+    "peking": "food",
+    "pepperoni": "food",
+    "persian": "food",
+    "pesto": "food",
+    "phad": "food",
+    "philly": "food",
+    "pho": "food",
+    "pia": "food",
+    "pie": "food",
+    "pierogies": "food",
+    "pierogi": "food",
+    "pies": "food",
+    "pigeon": "food",
+    "pita": "food",
+    "pizza": "food",
+    "place": "atmosphere",
+    "platters": "food",
+    "plate": "food",
+    "plates": "food",
+    "pocket": "food",
+    "pomegranate": "food",
+    "pop": "food",
+    "pops": "food",
+    "popular": "other",
+    "porc": "food",
+    "pork": "food",
+    "pot": "food",
+    "potato": "food",
+    "potatoes": "food",
+    "prawns": "food",
+    "prix": "price",
+    "prosciutto": "food",
+    "prosecco": "drinks",
+    "protein": "food",
+    "pub": "atmosphere",
+    "puff": "food",
+    "puffs": "food",
+    "pumpkin": "food",
+    "quail": "food",
+    "quartino": "drinks",
+    "quick": "service",
+    "quiche": "food",
+    "quinoa": "food",
+    "rack": "food",
+    "radish": "food",
+    "ramp": "food",
+    "ramyeon": "food",
+    "ravioli": "food",
+    "raw": "food",
+    "razor": "food",
+    "red": "food",
+    "refreshing": "atmosphere",
+    "restaurant": "other",
+    "restauraunt": "other",
+    "restaurantthe": "other",
+    "restaurants": "other",
+    "resturant": "other",
+    "roast": "food",
+    "roasted": "food",
+    "roll": "food",
+    "rolls": "food",
+    "romaine": "food",
+    "room": "atmosphere",
+    "root": "food",
+    "rose": "drinks",
+    "rotisserie": "food",
+    "rueben": "food",
+    "rum": "drinks",
+    "rump": "food",
+    "saganaki": "food",
+    "salad": "food",
+    "salads": "food",
+    "salami": "food",
+    "salmon": "food",
+    "sandwich": "food",
+    "sandwiches": "food",
+    "sangria": "drinks",
+    "sauce": "food",
+    "sauces": "food",
+    "sausage": "food",
+    "savory": "food",
+    "scallops": "food",
+    "schnitzel": "food",
+    "seasonal": "food",
+    "seaweed": "food",
+    "selection": "food",
+    "service": "service",
+    "services": "service",
+    "set": "other",
+    "shake": "drinks",
+    "shakes": "drinks",
+    "shakshuka": "food",
+    "shawarma": "food",
+    "shellfish": "food",
+    "sherry": "drinks",
+    "shiitake": "food",
+    "short": "other",
+    "shot": "drinks",
+    "shots": "drinks",
+    "shrimp": "food",
+    "side": "food",
+    "sides": "food",
+    "siu": "food",
+    "sliced": "food",
+    "sliders": "food",
+    "smoked": "food",
+    "smoothie": "drinks",
+    "smoothies": "drinks",
+    "soba": "food",
+    "soft": "other",
+    "soju": "drinks",
+    "soup": "food",
+    "soups": "food",
+    "south": "other",
+    "southern": "food",
+    "soya": "food",
+    "spanish": "food",
+    "sparkling": "drinks",
+    "special": "other",
+    "specials": "other",
+    "spice": "food",
+    "spicy": "food",
+    "spinach": "food",
+    "spoons": "other",
+    "spritz": "drinks",
+    "squash": "food",
+    "squeezed": "other",
+    "sriracha": "food",
+    "st": "other",
+    "stadium": "atmosphere",
+    "steak": "food",
+    "steaks": "food",
+    "stew": "food",
+    "sticks": "food",
+    "stir": "food",
+    "stix": "food",
+    "stone": "other",
+    "strawberry": "food",
+    "strudel": "food",
+    "style": "other",
+    "sugarcane": "food",
+    "sugarfish": "food",
+    "sukiyaki": "food",
+    "sundae": "food",
+    "sundays": "food",
+    "super": "other",
+    "sushi": "food",
+    "swedish": "food",
+    "sweet": "food",
+    "sweetbread": "food",
+    "sweetbreads": "food",
+    "sweets": "food",
+    "swiss": "food",
+    "swordfish": "food",
+    "szechuan": "food",
+    "table": "other",
+    "taco": "food",
+    "tacos": "food",
+    "tahini": "food",
+    "takeout": "service",
+    "tapas": "food",
+    "tart": "food",
+    "tartare": "food",
+    "tartufo": "food",
+    "tea": "drinks",
+    "teapot": "drinks",
+    "teas": "drinks",
+    "tempura": "food",
+    "tenderloin": "food",
+    "teriyaki": "food",
+    "thai": "food",
+    "thali": "food",
+    "the": "other",
+    "thee": "other",
+    "then": "other",
+    "thin": "other",
+    "thursday": "other",
+    "tikka": "food",
+    "to": "other",
+    "toast": "food",
+    "toasts": "food",
+    "tofu": "food",
+    "toffee": "food",
+    "tom": "food",
+    "tomyum": "food",
+    "tongue": "food",
+    "tonkatsu": "food",
+    "tony": "food",
+    "top": "other",
+    "topping": "food",
+    "toppings": "food",
+    "toro": "food",
+    "torte": "food",
+    "tortilla": "food",
+    "tortillas": "food",
+    "tortoise": "food",
+    "truffle": "food",
+    "truffles": "food",
+    "tuna": "food",
+    "turkey": "food",
+    "turkish": "food",
+    "turmeric": "food",
+    "turnip": "food",
+    "tuscan": "food",
+    "twist": "other",
+    "udon": "food",
+    "umami": "food",
+    "unagi": "food",
+    "union": "food",
+    "up": "other",
+    "upbeat": "atmosphere",
+    "upside": "other",
+    "urchin": "food",
+    "us": "other",
+    "uzbek": "food",
+    "vadai": "food",
+    "veal": "food",
+    "vegan": "food",
+    "vegetable": "food",
+    "vegetables": "food",
+    "vegetarian": "food",
+    "venison": "food",
+    "vermicelli": "food",
+    "vermouth": "drinks",
+    "vietnamese": "food",
+    "vindaloo": "food",
+    "vinegar": "food",
+    "vodka": "drinks",
+    "vol": "other",
+    "waffle": "food",
+    "waffles": "food",
+    "wagyu": "food",
+    "warm": "atmosphere",
+    "wasabi": "food",
+    "water": "drinks",
+    "watermelon": "food",
+    "wednesday": "other",
+    "weekend": "other",
+    "weekends": "other",
+    "weight": "other",
+    "wheat": "food",
+    "whiskey": "drinks",
+    "white": "food",
+    "whole": "food",
+    "wine": "drinks",
+    "wines": "drinks",
+    "wing": "food",
+    "wings": "food",
+    "winter": "other",
+    "with": "other",
+    "wok": "food",
+    "wonton": "food",
+    "wrap": "food",
+    "wraps": "food",
+    "xiao": "food",
+    "yakitori": "food",
+    "yam": "food",
+    "yellow": "food",
+    "yogurt": "food",
+    "york": "other",
+    "yorkshire": "food",
+    "yuzu": "food",
+    "zealand": "other",
+    "zucchini": "food",
+    "fontina": "food",
+    "staples": "other",
+    "ceasar": "food",
+    "octopus": "food",
+    "dough": "food",
+    "candle": "atmosphere",
+    "ricotta": "food",
+    "tac": "food",
+    "scoop": "food",
+    "employees": "service",
+    "sea": "food",
+    "tramezzini": "food",
+    "appreciated": "other",
+    "collapse": "other",
+    "negimaki": "food",
+    "napoleon": "food",
+    "beverage": "drinks",
+    "tip": "service",
+    "pleasure": "other",
+    "dhosas": "food",
+    "parmasean": "food",
+    "broiled": "food",
+    "stuff": "food",
+    "earthy": "other",
+    "frites": "food",
+    "hawaiian": "food",
+    "tamales": "food",
+    "cluding": "other",
+    "order": "service",
+    "sardines": "food",
+    "skin": "food",
+    "cigar": "atmosphere",
+    "district": "other",
+    "joint": "other",
+    "pinot": "drinks",
+    "barbecued": "food",
+    "dim": "atmosphere",
+    "polenta": "food",
+    "eateries": "other",
+    "terrine": "food",
+    "slice": "food",
+    "busboy": "service",
+    "scallop": "food",
+    "lobby": "atmosphere",
+    "seviche": "food",
+    "mirrors": "atmosphere",
+    "bakery": "food",
+    "rasberry": "food",
+    "frozen": "food",
+    "serving": "service",
+    "brasserie": "food",
+    "role": "other",
+    "category": "other",
+    "balls": "food",
+    "pepper": "food",
+    "range": "other",
+    "course": "food",
+    "lentil": "food",
+    "beverages": "drinks",
+    "noise": "atmosphere",
+    "app": "other",
+    "nachos": "food",
+    "seasoning": "food",
+    "kobe": "food",
+    "bagels": "food",
+    "varieties": "other",
+    "suace": "food",
+    "appropriately": "other",
+    "appys": "food",
+    "abijah": "other",
+    "exotic": "food",
+    "dj": "atmosphere",
+    "olive": "food",
+    "citrus": "food",
+    "country": "other",
+    "establishment": "other",
+    "decker": "other",
+    "banquet": "other",
+    "chard": "food",
+    "smoky": "other",
+    "sandwhich": "food",
+    "soupy": "food",
+    "walls": "atmosphere",
+    "california": "other",
+    "edamame": "food",
+    "resting": "other",
+    "tomatoes": "food",
+    "cooks": "service",
+    "chai": "drinks",
+    "glasses": "other",
+    "onions": "food",
+    "gold": "other",
+    "tortelli": "food",
+    "bloom": "other",
+    "closed": "other",
+    "bowl": "food",
+    "nostalgia": "other",
+    "scrambled": "food",
+    "time": "other",
+    "triple": "other",
+    "papaya": "food",
+    "busboys": "service",
+    "single": "other",
+    "msg": "food",
+    "concoction": "food",
+    "calves": "food",
+    "captain": "service",
+    "mint": "food",
+    "detail": "other",
+    "champagne": "drinks",
+    "chop": "food",
+    "bacon": "food",
+    "cooking": "other",
+    "dulce": "food",
+    "un": "other",
+    "candles": "atmosphere",
+    "door": "other",
+    "drunken": "food",
+    "guac": "food",
+    "eel": "food",
+    "slicked": "other",
+    "dumpling": "food",
+    "broth": "food",
+    "otoro": "food",
+    "secret": "other",
+    "2": "other",
+    "moules": "food",
+    "cup": "drinks",
+    "full": "other",
+    "yams": "food",
+    "answer": "other",
+    "skewers": "food",
+    "lox": "food",
+    "halibut": "food",
+    "enough": "other",
+    "sheet": "other",
+    "mexican": "food",
+    "fry": "food",
+    "bathrooms": "other",
+    "game": "food",
+    "darling": "other",
+    "jelly": "food",
+    "local": "other",
+    "piano": "atmosphere",
+    "cosmos": "drinks",
+    "filo": "food",
+    "polish": "food",
+    "seating": "atmosphere",
+    "ceviche": "food",
+    "cannoli": "food",
+    "versatile": "other",
+    "brick": "other",
+    "decore": "other",
+    "oven": "other",
+    "toe": "other",
+    "deluxe": "other",
+    "clientelle": "other",
+    "terrace": "atmosphere",
+    "salsa": "food",
+    "hummus": "food",
+    "attitudes": "service",
+    "color": "other",
+    "leche": "food",
+    "beats": "other",
+    "furnishings": "atmosphere",
+    "spread": "food",
+    "peppers": "food",
+    "coat": "other",
+    "whisper": "atmosphere",
+    "chole": "food",
+    "presentation": "other",
+    "deep": "other",
+    "desert": "food",
+    "cream": "food",
+    "buffet": "food",
+    "frisee": "food",
+    "speck": "food",
+    "diners": "service",
+    "individual": "other",
+    "front": "other",
+    "environment": "atmosphere",
+    "beet": "food",
+    "spring": "other",
+    "marina": "other",
+    "marinated": "food",
+    "tabs": "other",
+    "sardinian": "food",
+    "check": "other",
+    "squid": "food",
+    "bass": "food",
+    "clams": "food",
+    "beginning": "other",
+    "sinatra": "other",
+    "diner": "food",
+    "tic": "other",
+    "backyard": "other",
+    "tomato": "food",
+    "steamed": "food",
+    "per": "other",
+    "breast": "food",
+    "chips": "food",
+    "brown": "other",
+    "sommelier": "service",
+    "servings": "food",
+    "pineapple": "food",
+    "shirted": "other",
+    "oysters": "food",
+    "salt": "food",
+    "fragrant": "other",
+    "dhal": "food",
+    "pleasures": "other",
+    "seat": "service",
+    "appetites": "food",
+    "stained": "other",
+    "samosas": "food",
+    "ceiling": "atmosphere",
+    "escabeche": "food",
+    "crowds": "atmosphere",
+    "club": "other",
+    "bruschetta": "food",
+    "family": "other",
+    "poached": "food",
+    "crew": "service",
+    "temperature": "other",
+    "influence": "other",
+    "plantains": "food",
+    "suggestion": "other",
+    "pico": "food",
+    "bagel": "food",
+    "melt": "food",
+    "bountiful": "other",
+    "drop": "other",
+    "maitre": "service",
+    "artworks": "atmosphere",
+    "sicilian": "food",
+    "alternative": "other",
+    "spot": "other",
+    "kaiseki": "food",
+    "pompous": "other",
+    "comfort": "other",
+    "american": "food",
+    "tap": "drinks",
+    "ribbon": "other",
+    "guy": "other",
+    "customer": "service",
+    "kumquat": "food",
+    "mix": "other",
+    "brioche": "food",
+    "souffle": "food",
+    "knife": "other",
+    "soda": "drinks",
+    "nelson": "other",
+    "faced": "other",
+    "sum": "other",
+    "crowd": "atmosphere",
+    "summer": "other",
+    "holiday": "other",
+    "freaking": "other",
+    "waiter": "service",
+    "waitress": "service",
+    "manager": "service",
+    "servers": "service"
+}

models/BERT_LSTM_CRF.py ADDED Viewed

	@@ -0,0 +1,78 @@

+#from transformers import BertPreTrainedModel, BertForSequenceClassification, BertModel
+from transformers import AutoModel, PreTrainedModel
+from transformers.modeling_outputs import  TokenClassifierOutput
+from torch import nn
+from torch.nn import CrossEntropyLoss
+import torch
+from .layers import CRF
+from itertools import islice
+NUM_PER_LAYER = 16
+class BERTLstmCRF(PreTrainedModel):
+    _keys_to_ignore_on_load_unexpected = [r"pooler"]
+    def __init__(self, config):
+        super().__init__(config)
+        print(config)
+        self.num_labels = config.num_labels
+        self.bert = AutoModel.from_pretrained(config._name_or_path, config=config, add_pooling_layer=False)
+        classifier_dropout = (config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.bilstm = nn.LSTM(config.hidden_size, (config.hidden_size) // 2, batch_first=True, bidirectional=True)
+        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
+        self.crf = CRF(num_tags=config.num_labels, batch_first=True)
+        if self.config.freeze == True:
+            self.manage_freezing()
+        #self.bert.init_weights() # load pretrained weights
+    def manage_freezing(self):
+        for _, param in self.bert.embeddings.named_parameters():
+            param.requires_grad = False
+        num_encoders_to_freeze = self.config.num_frozen_encoder
+        if num_encoders_to_freeze > 0:
+            for _, param in islice(self.bert.encoder.named_parameters(), num_encoders_to_freeze*NUM_PER_LAYER):
+                param.requires_grad = False
+    def forward(self,
+                input_ids=None,
+                attention_mask=None,
+                token_type_ids=None,
+                position_ids=None,
+                head_mask=None,
+                inputs_embeds=None,
+                labels=None,
+                output_attentions=None,
+                output_hidden_states=None,
+                return_dict=None
+               ):
+        # Default `model.config.use_return_dict´ is `True´
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        outputs = self.bert(input_ids,
+                            attention_mask=attention_mask,
+                            token_type_ids=token_type_ids,
+                            position_ids=position_ids,
+                            head_mask=head_mask,
+                            inputs_embeds=inputs_embeds,
+                            output_attentions=output_attentions,
+                            output_hidden_states=output_hidden_states,
+                            return_dict=return_dict)
+        sequence_output = outputs[0]
+        sequence_output = self.dropout(sequence_output)
+        lstm_output, hc = self.bilstm(sequence_output)
+        logits = self.classifier(lstm_output)
+        loss = None
+        if labels is not None:
+            # During train/test as we don't pass labels during inference
+            loss = -1 * self.crf(logits, labels)
+        tags = torch.Tensor(self.crf.decode(logits))
+        return loss, tags

models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .BERT_LSTM_CRF import BERTLstmCRF

models/layers/CRF.py ADDED Viewed

	@@ -0,0 +1,353 @@

+# Taken from https://github.com/kmkurn/pytorch-crf/blob/master/torchcrf/__init__.py and fixed got uint8 warning
+__version__ = '0.7.2'
+from typing import List, Optional
+import torch
+import torch.nn as nn
+LARGE_NEGATIVE_NUMBER = -1e9
+class CRF(nn.Module):
+    """Conditional random field.
+    This module implements a conditional random field [LMP01]_. The forward computation
+    of this class computes the log likelihood of the given sequence of tags and
+    emission score tensor. This class also has `~CRF.decode` method which finds
+    the best tag sequence given an emission score tensor using `Viterbi algorithm`_.
+    Args:
+        num_tags: Number of tags.
+        batch_first: Whether the first dimension corresponds to the size of a minibatch.
+    Attributes:
+        start_transitions (`~torch.nn.Parameter`): Start transition score tensor of size
+            ``(num_tags,)``.
+        end_transitions (`~torch.nn.Parameter`): End transition score tensor of size
+            ``(num_tags,)``.
+        transitions (`~torch.nn.Parameter`): Transition score tensor of size
+            ``(num_tags, num_tags)``.
+    .. [LMP01] Lafferty, J., McCallum, A., Pereira, F. (2001).
+       "Conditional random fields: Probabilistic models for segmenting and
+       labeling sequence data". *Proc. 18th International Conf. on Machine
+       Learning*. Morgan Kaufmann. pp. 282–289.
+    .. _Viterbi algorithm: https://en.wikipedia.org/wiki/Viterbi_algorithm
+    """
+    def __init__(self, num_tags: int, batch_first: bool = False) -> None:
+        if num_tags <= 0:
+            raise ValueError(f'invalid number of tags: {num_tags}')
+        super().__init__()
+        self.num_tags = num_tags
+        self.batch_first = batch_first
+        self.start_transitions = nn.Parameter(torch.empty(num_tags))
+        self.end_transitions = nn.Parameter(torch.empty(num_tags))
+        self.transitions = nn.Parameter(torch.empty(num_tags, num_tags))
+        self.reset_parameters()
+        self.mask_impossible_transitions()
+    def reset_parameters(self) -> None:
+        """Initialize the transition parameters.
+        The parameters will be initialized randomly from a uniform distribution
+        between -0.1 and 0.1.
+        """
+        nn.init.uniform_(self.start_transitions, -0.1, 0.1)
+        nn.init.uniform_(self.end_transitions, -0.1, 0.1)
+        nn.init.uniform_(self.transitions, -0.1, 0.1)
+    def mask_impossible_transitions(self) -> None:
+        """Set the value of impossible transitions to LARGE_NEGATIVE_NUMBER
+        - start transition value of I-X
+        - transition score of O -> I
+        """
+        with torch.no_grad():
+            self.start_transitions[2] = LARGE_NEGATIVE_NUMBER
+            self.start_transitions[4] = LARGE_NEGATIVE_NUMBER
+            self.start_transitions[6] = LARGE_NEGATIVE_NUMBER
+            self.transitions[0][2] = LARGE_NEGATIVE_NUMBER
+            self.transitions[0][4] = LARGE_NEGATIVE_NUMBER
+            self.transitions[0][6] = LARGE_NEGATIVE_NUMBER
+            self.transitions[1][4] = LARGE_NEGATIVE_NUMBER
+            self.transitions[1][6] = LARGE_NEGATIVE_NUMBER
+            self.transitions[2][4] = LARGE_NEGATIVE_NUMBER
+            self.transitions[2][6] = LARGE_NEGATIVE_NUMBER
+            self.transitions[3][2] = LARGE_NEGATIVE_NUMBER
+            self.transitions[3][6] = LARGE_NEGATIVE_NUMBER
+            self.transitions[4][2] = LARGE_NEGATIVE_NUMBER
+            self.transitions[4][6] = LARGE_NEGATIVE_NUMBER
+            self.transitions[5][2] = LARGE_NEGATIVE_NUMBER
+            self.transitions[5][4] = LARGE_NEGATIVE_NUMBER
+            self.transitions[6][2] = LARGE_NEGATIVE_NUMBER
+            self.transitions[6][4] = LARGE_NEGATIVE_NUMBER
+    def __repr__(self) -> str:
+        return f'{self.__class__.__name__}(num_tags={self.num_tags})'
+    def forward(
+            self,
+            emissions: torch.Tensor,
+            tags: torch.LongTensor,
+            mask: Optional[torch.ByteTensor] = None,
+            reduction: str = 'sum',
+    ) -> torch.Tensor:
+        """Compute the conditional log likelihood of a sequence of tags given emission scores.
+        Args:
+            emissions (`~torch.Tensor`): Emission score tensor of size
+                ``(seq_length, batch_size, num_tags)`` if ``batch_first`` is ``False``,
+                ``(batch_size, seq_length, num_tags)`` otherwise.
+            tags (`~torch.LongTensor`): Sequence of tags tensor of size
+                ``(seq_length, batch_size)`` if ``batch_first`` is ``False``,
+                ``(batch_size, seq_length)`` otherwise.
+            mask (`~torch.ByteTensor`): Mask tensor of size ``(seq_length, batch_size)``
+                if ``batch_first`` is ``False``, ``(batch_size, seq_length)`` otherwise.
+            reduction: Specifies  the reduction to apply to the output:
+                ``none|sum|mean|token_mean``. ``none``: no reduction will be applied.
+                ``sum``: the output will be summed over batches. ``mean``: the output will be
+                averaged over batches. ``token_mean``: the output will be averaged over tokens.
+        Returns:
+            `~torch.Tensor`: The log likelihood. This will have size ``(batch_size,)`` if
+            reduction is ``none``, ``()`` otherwise.
+        """
+        #self.mask_impossible_transitions()
+        self._validate(emissions, tags=tags, mask=mask)
+        if reduction not in ('none', 'sum', 'mean', 'token_mean'):
+            raise ValueError(f'invalid reduction: {reduction}')
+        if mask is None:
+            mask = torch.ones_like(tags, dtype=torch.uint8)
+        if self.batch_first:
+            emissions = emissions.transpose(0, 1)
+            tags = tags.transpose(0, 1)
+            mask = mask.transpose(0, 1)
+        # shape: (batch_size,)
+        numerator = self._compute_score(emissions, tags, mask)
+        # shape: (batch_size,)
+        denominator = self._compute_normalizer(emissions, mask)
+        # shape: (batch_size,)
+        llh = numerator - denominator
+        if reduction == 'none':
+            return llh
+        if reduction == 'sum':
+            return llh.sum()
+        if reduction == 'mean':
+            return llh.mean()
+        assert reduction == 'token_mean'
+        return llh.sum() / mask.type_as(emissions).sum()
+    def decode(self, emissions: torch.Tensor,
+               mask: Optional[torch.ByteTensor] = None) -> List[List[int]]:
+        """Find the most likely tag sequence using Viterbi algorithm.
+        Args:
+            emissions (`~torch.Tensor`): Emission score tensor of size
+                ``(seq_length, batch_size, num_tags)`` if ``batch_first`` is ``False``,
+                ``(batch_size, seq_length, num_tags)`` otherwise.
+            mask (`~torch.ByteTensor`): Mask tensor of size ``(seq_length, batch_size)``
+                if ``batch_first`` is ``False``, ``(batch_size, seq_length)`` otherwise.
+        Returns:
+            List of list containing the best tag sequence for each batch.
+        """
+        self._validate(emissions, mask=mask)
+        if mask is None:
+            mask = emissions.new_ones(emissions.shape[:2], dtype=torch.uint8)
+        if self.batch_first:
+            emissions = emissions.transpose(0, 1)
+            mask = mask.transpose(0, 1)
+        return self._viterbi_decode(emissions, mask)
+    def _validate(
+            self,
+            emissions: torch.Tensor,
+            tags: Optional[torch.LongTensor] = None,
+            mask: Optional[torch.ByteTensor] = None) -> None:
+        if emissions.dim() != 3:
+            raise ValueError(f'emissions must have dimension of 3, got {emissions.dim()}')
+        if emissions.size(2) != self.num_tags:
+            raise ValueError(
+                f'expected last dimension of emissions is {self.num_tags}, '
+                f'got {emissions.size(2)}')
+        if tags is not None:
+            if emissions.shape[:2] != tags.shape:
+                raise ValueError(
+                    'the first two dimensions of emissions and tags must match, '
+                    f'got {tuple(emissions.shape[:2])} and {tuple(tags.shape)}')
+        if mask is not None:
+            if emissions.shape[:2] != mask.shape:
+                raise ValueError(
+                    'the first two dimensions of emissions and mask must match, '
+                    f'got {tuple(emissions.shape[:2])} and {tuple(mask.shape)}')
+            no_empty_seq = not self.batch_first and mask[0].all()
+            no_empty_seq_bf = self.batch_first and mask[:, 0].all()
+            if not no_empty_seq and not no_empty_seq_bf:
+                raise ValueError('mask of the first timestep must all be on')
+    def _compute_score(
+            self, emissions: torch.Tensor, tags: torch.LongTensor,
+            mask: torch.ByteTensor) -> torch.Tensor:
+        # emissions: (seq_length, batch_size, num_tags)
+        # tags: (seq_length, batch_size)
+        # mask: (seq_length, batch_size)
+        assert emissions.dim() == 3 and tags.dim() == 2
+        assert emissions.shape[:2] == tags.shape
+        assert emissions.size(2) == self.num_tags
+        assert mask.shape == tags.shape
+        assert mask[0].all()
+        seq_length, batch_size = tags.shape
+        mask = mask.type_as(emissions)
+        # Start transition score and first emission
+        # shape: (batch_size,)
+        score = self.start_transitions[tags[0]]
+        score += emissions[0, torch.arange(batch_size), tags[0]]
+        for i in range(1, seq_length):
+            # Transition score to next tag, only added if next timestep is valid (mask == 1)
+            # shape: (batch_size,)
+            score += self.transitions[tags[i - 1], tags[i]] * mask[i]
+            # Emission score for next tag, only added if next timestep is valid (mask == 1)
+            # shape: (batch_size,)
+            score += emissions[i, torch.arange(batch_size), tags[i]] * mask[i]
+        # End transition score
+        # shape: (batch_size,)
+        seq_ends = mask.long().sum(dim=0) - 1
+        # shape: (batch_size,)
+        last_tags = tags[seq_ends, torch.arange(batch_size)]
+        # shape: (batch_size,)
+        score += self.end_transitions[last_tags]
+        return score
+    def _compute_normalizer(
+            self, emissions: torch.Tensor, mask: torch.ByteTensor) -> torch.Tensor:
+        # emissions: (seq_length, batch_size, num_tags)
+        # mask: (seq_length, batch_size)
+        assert emissions.dim() == 3 and mask.dim() == 2
+        assert emissions.shape[:2] == mask.shape
+        assert emissions.size(2) == self.num_tags
+        assert mask[0].all()
+        seq_length = emissions.size(0)
+        # Start transition score and first emission; score has size of
+        # (batch_size, num_tags) where for each batch, the j-th column stores
+        # the score that the first timestep has tag j
+        # shape: (batch_size, num_tags)
+        score = self.start_transitions + emissions[0]
+        for i in range(1, seq_length):
+            # Broadcast score for every possible next tag
+            # shape: (batch_size, num_tags, 1)
+            broadcast_score = score.unsqueeze(2)
+            # Broadcast emission score for every possible current tag
+            # shape: (batch_size, 1, num_tags)
+            broadcast_emissions = emissions[i].unsqueeze(1)
+            # Compute the score tensor of size (batch_size, num_tags, num_tags) where
+            # for each sample, entry at row i and column j stores the sum of scores of all
+            # possible tag sequences so far that end with transitioning from tag i to tag j
+            # and emitting
+            # shape: (batch_size, num_tags, num_tags)
+            next_score = broadcast_score + self.transitions + broadcast_emissions
+            # Sum over all possible current tags, but we're in score space, so a sum
+            # becomes a log-sum-exp: for each sample, entry i stores the sum of scores of
+            # all possible tag sequences so far, that end in tag i
+            # shape: (batch_size, num_tags)
+            next_score = torch.logsumexp(next_score, dim=1)
+            # Set score to the next score if this timestep is valid (mask == 1)
+            # shape: (batch_size, num_tags)
+            score = torch.where(mask[i].unsqueeze(1).bool(), next_score, score)
+        # End transition score
+        # shape: (batch_size, num_tags)
+        score += self.end_transitions
+        # Sum (log-sum-exp) over all possible tags
+        # shape: (batch_size,)
+        return torch.logsumexp(score, dim=1)
+    def _viterbi_decode(self, emissions: torch.FloatTensor,
+                        mask: torch.ByteTensor) -> List[List[int]]:
+        # emissions: (seq_length, batch_size, num_tags)
+        # mask: (seq_length, batch_size)
+        assert emissions.dim() == 3 and mask.dim() == 2
+        assert emissions.shape[:2] == mask.shape
+        assert emissions.size(2) == self.num_tags
+        assert mask[0].all()
+        seq_length, batch_size = mask.shape
+        # Start transition and first emission
+        # shape: (batch_size, num_tags)
+        score = self.start_transitions + emissions[0]
+        history = []
+        # score is a tensor of size (batch_size, num_tags) where for every batch,
+        # value at column j stores the score of the best tag sequence so far that ends
+        # with tag j
+        # history saves where the best tags candidate transitioned from; this is used
+        # when we trace back the best tag sequence
+        # Viterbi algorithm recursive case: we compute the score of the best tag sequence
+        # for every possible next tag
+        for i in range(1, seq_length):
+            # Broadcast viterbi score for every possible next tag
+            # shape: (batch_size, num_tags, 1)
+            broadcast_score = score.unsqueeze(2)
+            # Broadcast emission score for every possible current tag
+            # shape: (batch_size, 1, num_tags)
+            broadcast_emission = emissions[i].unsqueeze(1)
+            # Compute the score tensor of size (batch_size, num_tags, num_tags) where
+            # for each sample, entry at row i and column j stores the score of the best
+            # tag sequence so far that ends with transitioning from tag i to tag j and emitting
+            # shape: (batch_size, num_tags, num_tags)
+            next_score = broadcast_score + self.transitions + broadcast_emission
+            # Find the maximum score over all possible current tag
+            # shape: (batch_size, num_tags)
+            next_score, indices = next_score.max(dim=1)
+            # Set score to the next score if this timestep is valid (mask == 1)
+            # and save the index that produces the next score
+            # shape: (batch_size, num_tags)
+            score = torch.where(mask[i].unsqueeze(1).bool(), next_score, score)
+            history.append(indices)
+        # End transition score
+        # shape: (batch_size, num_tags)
+        score += self.end_transitions
+        # Now, compute the best path for each sample
+        # shape: (batch_size,)
+        seq_ends = mask.long().sum(dim=0) - 1
+        best_tags_list = []
+        for idx in range(batch_size):
+            # Find the tag which maximizes the score at the last timestep; this is our best tag
+            # for the last timestep
+            _, best_last_tag = score[idx].max(dim=0)
+            best_tags = [best_last_tag.item()]
+            # We trace back where the best last tag comes from, append that to our best tag
+            # sequence, and trace it back again, and so on
+            for hist in reversed(history[:seq_ends[idx]]):
+                best_last_tag = hist[idx][best_tags[-1]]
+                best_tags.append(best_last_tag.item())
+            # Reverse the order because we start from the last timestep
+            best_tags.reverse()
+            best_tags_list.append(best_tags)
+        return best_tags_list

models/layers/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .CRF import CRF

models/layers/__pycache__/CRF.cpython-310.pyc ADDED Viewed

Binary file (9.37 kB). View file

models/layers/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (175 Bytes). View file