Spaces:
Runtime error
Runtime error
File size: 2,672 Bytes
b4e3c5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# external
import pytest
# project
import textdistance
import csv
import pyarabic.araby as araby
import numpy as np
from statistics import mean
import pickle
import os
import gradio as gr
def textdistance_lcsseq(A,B):
C = textdistance.lcsseq(A,B)
str_return = ""
i=0
j=0
inside=False
grade = 0
seq = 1
while i<len(B) and j<len(C):
if B[i] == C[j]:
if inside:
str_return += B[i]
else:
inside = True
str_return += "("
str_return += B[i]
grade += seq
seq += 1
i+=1
j+=1
else:
seq = 1
grade -= 0.3
if inside:
str_return += ")"
inside = False
str_return += B[i]
i+=1
else:
if C[j]==" ":
while not B[i] == C[j]:
str_return += B[i]
i+=1
j+=1
str_return += B[i]
i+=1
if inside:
str_return += ")"
while i<len(B):
grade -= 0.3
str_return += B[i]
i+=1
for wordA in A.split(" "):
for wordB in B.split(" "):
if wordA == wordB:
grade+=10
return str_return,grade
def load():
quran = []
filename = "pickle.pkl"
if(not os.path.exists(filename)):
picklefile = open(filename, 'wb')
csv_file = open('quran.csv', encoding="utf-8")
csv_reader = csv.reader(csv_file, delimiter=',')
for i,row in enumerate(csv_reader):
quran.append(araby.strip_diacritics(row[2]))
pickle.dump(quran, picklefile)
else:
picklefile = open(filename, 'rb')
quran = pickle.load(picklefile)
return quran
def search(query,numberOfResults):
quran = load()
lcsseq = []
lengths = []
for q in quran:
tmp1,tmp2 = textdistance_lcsseq(query,q)
lcsseq.append(tmp1)
lengths.append(tmp2)
indices = [b[0] for b in sorted(enumerate(lengths),key=lambda i:i[1],reverse=True)]
lengths_sorted = sorted(lengths,reverse=True)
meanOfHead = mean(lengths_sorted[0:min(numberOfResults*3,len(lengths_sorted))])
toReturn =""
for i in range(0,min(numberOfResults,len(indices))):
if(lengths[indices[i]] > meanOfHead):
toReturn += "%d : %s"%(lengths[indices[i]],lcsseq[indices[i]]) + "\n"
return toReturn
gr.Interface(fn=search, inputs=["text",gr.Slider(1, 100, value=10, step=1)], outputs=["text"]).launch() |