File size: 4,567 Bytes
ec1a337
 
 
 
f652b33
 
ec1a337
30933bd
ec1a337
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30933bd
ec1a337
 
 
 
 
 
 
 
 
 
 
 
 
 
30933bd
ec1a337
 
 
 
 
 
 
30933bd
ec1a337
 
 
 
30933bd
ec1a337
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30933bd
 
ec1a337
30933bd
ec1a337
 
 
30933bd
ec1a337
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30933bd
ec1a337
 
 
 
 
 
 
 
 
 
 
 
e228ec7
ec1a337
 
 
30933bd
ec1a337
 
 
30933bd
ec1a337
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import os
import requests
import streamlit as st
import torch
# from transformers import AutoTokenizer, AutoModel
# from sentence_transformers import util
class SentenceSimiliarity():

    def __init__(self, model_name, sentence1, sentence2):
        self.KEY = os.getenv("HF_KEY")
        self.headers = {"Authorization": f"Bearer {self.KEY}"}
        self.sentence1 = sentence1
        self.sentence2 = sentence2
        self.api_url = f"https://api-inference.huggingface.co/models/{model_name}"
        
        # self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        # self.model = AutoModel.from_pretrained(self.model_name)
    
    def model_selection(self):
        available_models = [
            # "distilbert-base-uncased",
            # "bert-base-uncased",
            "sentence-transformers/all-MiniLM-L6-v2",
            "sentence-transformers/all-mpnet-base-v2",
            "sentence-transformers/distiluse-base-multilingual-cased-v2",
            "intfloat/e5-small",
            "intfloat/e5-base",
            "intfloat/e5-large-v2",
            "intfloat/multilingual-e5-base",
            # "togethercomputer/m2-bert-80M-32k-retrieval",
            # "togethercomputer/m2-bert-80M-8k-retrieval",
            # "togethercomputer/m2-bert-80M-2k-retrieval",
        ]
        self.model_name = st.sidebar.selectbox(
            label="Select Your Models",
            options=available_models,
        )
        

    # def tokenize(self):
    #     tokenized1 = self.tokenizer(
    #         self.sentence1,
    #         return_tensors='pt',
    #         padding=True,
    #         truncation=True
    #     )
    #     tokenized2 = self.tokenizer(
    #         self.sentence2,
    #         return_tensors='pt',
    #         padding=True,
    #         truncation=True
    #     )
    #     return tokenized1, tokenized2

    # def get_embeddings(self):
    #     # tokenized1, tokenized2 = self.tokenize()
        
    #     with torch.no_grad():
    #         embeddings1 = self.model(**tokenized1).last_hidden_state.mean(dim=1)
    #         embeddings2 = self.model(**tokenized2).last_hidden_state.mean(dim=1)
    #     return embeddings1, embeddings2
    
    # def get_similarity_scores(self):
    #     embeddings1, embeddings2 = self.get_embeddings()
    #     scores = util.cos_sim(embeddings1, embeddings2)
    #     return scores

    def query(self, payload):
        response = requests.post(self.api_url, headers=self.headers, json=payload)
        return response.json()

    def results(self):
        scores = self.query({
            "inputs": {
                "source_sentence": self.sentence1,
                "sentences": [
                    self.sentence2,
                ]
            },
        })
        # scores = self.get_similarity_scores()
        statement = f"The sentence has {scores[0] * 100:.2f}% similarity"
        # statement = scores
        return statement
    

class UI():

    def __init__(self):
        st.title("Sentence Similiarity Checker")
        st.caption("You can use this for checking similarity between resume and job description")
    
    def get(self):
        available_models = [
            # "distilbert-base-uncased",
            # "bert-base-uncased",
            "sentence-transformers/all-MiniLM-L6-v2",
            "sentence-transformers/all-mpnet-base-v2",
            "sentence-transformers/distiluse-base-multilingual-cased-v2",
            "intfloat/e5-small",
            "intfloat/e5-base",
            "intfloat/e5-large-v2",
            "intfloat/multilingual-e5-base",
            # "togethercomputer/m2-bert-80M-32k-retrieval",
            # "togethercomputer/m2-bert-80M-8k-retrieval",
            # "togethercomputer/m2-bert-80M-2k-retrieval",
        ]
        self.model_name = st.sidebar.selectbox(
            label="Select Your Models",
            options=available_models,
        )

        self.sentence1 = st.text_area(
            label="Sentence 1",
            help="This is a parent text the next text will be compared with this text"
        )
        self.sentence2 = st.text_area(
            label="Sentence 2",
            help="This is a child text"
        )
        self.button = st.button(
            label="Check",
            help='Check Sentence Similarity'
        )

    def result(self):
        self.get()
        ss = SentenceSimiliarity(self.model_name, self.sentence1, self.sentence2)
        
        if self.button:
            st.text(ss.results())
        # print(ss.results())

ui = UI()
ui.result()