lcseq / app.py
muhalmutaz
main
b4e3c5c
# external
import pytest
# project
import textdistance
import csv
import pyarabic.araby as araby
import numpy as np
from statistics import mean
import pickle
import os
import gradio as gr
def textdistance_lcsseq(A,B):
C = textdistance.lcsseq(A,B)
str_return = ""
i=0
j=0
inside=False
grade = 0
seq = 1
while i<len(B) and j<len(C):
if B[i] == C[j]:
if inside:
str_return += B[i]
else:
inside = True
str_return += "("
str_return += B[i]
grade += seq
seq += 1
i+=1
j+=1
else:
seq = 1
grade -= 0.3
if inside:
str_return += ")"
inside = False
str_return += B[i]
i+=1
else:
if C[j]==" ":
while not B[i] == C[j]:
str_return += B[i]
i+=1
j+=1
str_return += B[i]
i+=1
if inside:
str_return += ")"
while i<len(B):
grade -= 0.3
str_return += B[i]
i+=1
for wordA in A.split(" "):
for wordB in B.split(" "):
if wordA == wordB:
grade+=10
return str_return,grade
def load():
quran = []
filename = "pickle.pkl"
if(not os.path.exists(filename)):
picklefile = open(filename, 'wb')
csv_file = open('quran.csv', encoding="utf-8")
csv_reader = csv.reader(csv_file, delimiter=',')
for i,row in enumerate(csv_reader):
quran.append(araby.strip_diacritics(row[2]))
pickle.dump(quran, picklefile)
else:
picklefile = open(filename, 'rb')
quran = pickle.load(picklefile)
return quran
def search(query,numberOfResults):
quran = load()
lcsseq = []
lengths = []
for q in quran:
tmp1,tmp2 = textdistance_lcsseq(query,q)
lcsseq.append(tmp1)
lengths.append(tmp2)
indices = [b[0] for b in sorted(enumerate(lengths),key=lambda i:i[1],reverse=True)]
lengths_sorted = sorted(lengths,reverse=True)
meanOfHead = mean(lengths_sorted[0:min(numberOfResults*3,len(lengths_sorted))])
toReturn =""
for i in range(0,min(numberOfResults,len(indices))):
if(lengths[indices[i]] > meanOfHead):
toReturn += "%d : %s"%(lengths[indices[i]],lcsseq[indices[i]]) + "\n"
return toReturn
gr.Interface(fn=search, inputs=["text",gr.Slider(1, 100, value=10, step=1)], outputs=["text"]).launch()