Spaces:
Runtime error
Runtime error
# external | |
import pytest | |
# project | |
import textdistance | |
import csv | |
import pyarabic.araby as araby | |
import numpy as np | |
from statistics import mean | |
import pickle | |
import os | |
import gradio as gr | |
def textdistance_lcsseq(A,B): | |
C = textdistance.lcsseq(A,B) | |
str_return = "" | |
i=0 | |
j=0 | |
inside=False | |
grade = 0 | |
seq = 1 | |
while i<len(B) and j<len(C): | |
if B[i] == C[j]: | |
if inside: | |
str_return += B[i] | |
else: | |
inside = True | |
str_return += "(" | |
str_return += B[i] | |
grade += seq | |
seq += 1 | |
i+=1 | |
j+=1 | |
else: | |
seq = 1 | |
grade -= 0.3 | |
if inside: | |
str_return += ")" | |
inside = False | |
str_return += B[i] | |
i+=1 | |
else: | |
if C[j]==" ": | |
while not B[i] == C[j]: | |
str_return += B[i] | |
i+=1 | |
j+=1 | |
str_return += B[i] | |
i+=1 | |
if inside: | |
str_return += ")" | |
while i<len(B): | |
grade -= 0.3 | |
str_return += B[i] | |
i+=1 | |
for wordA in A.split(" "): | |
for wordB in B.split(" "): | |
if wordA == wordB: | |
grade+=10 | |
return str_return,grade | |
def load(): | |
quran = [] | |
filename = "pickle.pkl" | |
if(not os.path.exists(filename)): | |
picklefile = open(filename, 'wb') | |
csv_file = open('quran.csv', encoding="utf-8") | |
csv_reader = csv.reader(csv_file, delimiter=',') | |
for i,row in enumerate(csv_reader): | |
quran.append(araby.strip_diacritics(row[2])) | |
pickle.dump(quran, picklefile) | |
else: | |
picklefile = open(filename, 'rb') | |
quran = pickle.load(picklefile) | |
return quran | |
def search(query,numberOfResults): | |
quran = load() | |
lcsseq = [] | |
lengths = [] | |
for q in quran: | |
tmp1,tmp2 = textdistance_lcsseq(query,q) | |
lcsseq.append(tmp1) | |
lengths.append(tmp2) | |
indices = [b[0] for b in sorted(enumerate(lengths),key=lambda i:i[1],reverse=True)] | |
lengths_sorted = sorted(lengths,reverse=True) | |
meanOfHead = mean(lengths_sorted[0:min(numberOfResults*3,len(lengths_sorted))]) | |
toReturn ="" | |
for i in range(0,min(numberOfResults,len(indices))): | |
if(lengths[indices[i]] > meanOfHead): | |
toReturn += "%d : %s"%(lengths[indices[i]],lcsseq[indices[i]]) + "\n" | |
return toReturn | |
gr.Interface(fn=search, inputs=["text",gr.Slider(1, 100, value=10, step=1)], outputs=["text"]).launch() |