File size: 2,251 Bytes
27c69e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import time

import gradio as gr
import numpy as np
import torch
# Load model directly
from transformers import AutoModelForSequenceClassification, AutoTokenizer

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


def get_model():
    start_time = time.time()
    model = AutoModelForSequenceClassification.from_pretrained("TURKCELL/gibberish-detection-model-tr")
    tokenizer = AutoTokenizer.from_pretrained("TURKCELL/gibberish-detection-model-tr", do_lower_case=True,
                                              use_fast=True)
    model.to(device)
    print(f'bert model loading time {time.time() - start_time}')
    return tokenizer, model


tokenizer, model = get_model()


def get_result_for_one_sample(model, tokenizer, device, sample):
    d = {
        1: 'gibberish',
        0: 'real'
    }
    test_sample = tokenizer([sample], padding=True, truncation=True, max_length=256, return_tensors='pt').to(device)
    # test_sample
    output = model(**test_sample)
    y_pred = np.argmax(output.logits.detach().to('cpu').numpy(), axis=1)
    return d[y_pred[0]]


def process_sentence_with_bert(sentence):
    print('processing text with bert')
    start = time.time()
    result = get_result_for_one_sample(model, tokenizer, device,
                                       sentence)  # Bu fonksiyonun implementasyonunu sağlamalısınız.
    print(f'bert processing time {time.time() - start}')
    return result


def classify_gibberish(sentence, ignore_words_file):
    # ignore_words_file işlenmesi gerekiyor. Gradio dosya yükleme ile ilgili bir örneği aşağıda bulabilirsiniz.
    result = process_sentence_with_bert(sentence)
    return result


iface = gr.Interface(fn=classify_gibberish,
                     inputs=[gr.Textbox(lines=2, placeholder="Enter Sentence Here..."),
                             gr.File(label="Upload Ignore Words File")],
                     outputs=gr.Textbox(label="Gibberish Detection Result"),
                     title="Simple Gibberish Text Detection For Turkish",
                     description="""Simple gibberish text detection given text like 
                                    adsfdnsfnıunf 
                                    sasdlsöefls.""")
iface.launch()