import tensorflow as tf from transformers import AutoTokenizer, TFAutoModelForSequenceClassification import gradio as gr import numpy as np from scipy.special import softmax tokenizer = AutoTokenizer.from_pretrained('roberta-base') from transformers import TFAutoModelForSequenceClassification model = TFAutoModelForSequenceClassification.from_pretrained("ilan541/OncUponTim") def split_text(text, nb_splits): cols = ['split_'+ str(i) for i in range(1, nb_splits+1)] l = len(text) chars = int(l/nb_splits) out = [] for i in range(0, l, chars): out.append(text[ i : i+chars]) return out def get_probs(list_of_portions): y_pred_logits_0 = [] y_pred_logits_1 = [] for text in list_of_portions: inp = tokenizer(text, truncation=True, padding='max_length', max_length=512, return_tensors='tf') y_pred = model(inp) y_pred_logits_0.append(y_pred.logits[:,0]) y_pred_logits_1.append(y_pred.logits[:,1]) return np.mean(y_pred_logits_0), np.mean(y_pred_logits_1) def predict(your_text): # split the text nb_splits = 3 splits = split_text(your_text, nb_splits) y_logits_0, y_logits_1 = get_probs(splits) print('y_logits_0:', y_logits_0) print('y_logits_1:', y_logits_1) y_probs_1 = softmax([y_logits_0, y_logits_1])[1] print('y_probs_1:', y_probs_1) threshold_value = 0.35 y_pred_1 = (y_probs_1 >= threshold_value) print('y_pred_1:', y_pred_1) # inference if not y_pred_1 : return 'This content is not of high standard. It needs editing. ' else: return 'Promising content! Our algorithm predicts it will be very popular.' iface = gr.Interface(fn=predict, inputs="text", outputs="text") iface.launch()