File size: 1,789 Bytes
4440f5f
 
 
f804a6f
4440f5f
ceb6ff1
4440f5f
 
 
 
bd604ec
 
4440f5f
 
6d42934
7f0e044
1379df3
 
 
6d42934
 
 
 
f804a6f
6d42934
4440f5f
e8d071a
 
18f406c
f804a6f
e8d071a
3ff0003
bee71b9
 
4440f5f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from ragatouille import RAGPretrainedModel
import os 
import gradio as gr
from unicodedata import normalize 

path_to_index = 'colbert/indexes/ArColbertQuran'
message = "waiting to load index ..."
if os.path.exists(path_to_index):
    RAG = RAGPretrainedModel.from_index(path_to_index)
    message = "index loaded!"
    print(message)
    
import gradio as gr

def process_results(results):
    answer = ""
    for r in results:
        answer += f"Sura: {r['document_id']} ({r['document_metadata']}) \n Text:{r['content']}\n\n"
    return answer

k = 3 # How many documents you want to retrieve

def answer_fn(query):
    results = RAG.search(query= normalize('NFKC', query), k=k)
    return process_results(results)

qapp = gr.Interface(fn=answer_fn, inputs="textbox", outputs="textbox",
                    examples=[
                    "ما أهمية كتابة المعاملات؟", "أخبرني عن عذاب الله للمنافقين", "حسن معاملة الوالدين", "ما معجزات سيدنا عيسى", "ما هو التطفيف"
                    ,"ما قصة المؤمنين الذين قتلوا في الحفرة؟", "ما آداب اﻻستئذان؟", "النبي الذي تربى في بيت حاكم مصر"],
                   title="Qur\'an Retrieval Demo - Semantic Search",
    description="A basic demo based on Arabic ColBERT (250k queries, normalized) and simple text of the Qur\'an (also normalized). First query may take a minute, then much faster.\
    Try to include relevant terms - this is just retrieval, not LLM chat and Qur\'an is an edge case.\
    For details, see: https://www.linkedin.com/posts/akhooli_arabic-1-million-curated-triplets-dataset-activity-7222951839774699521-PZcw",)
if __name__ == "__main__":
    qapp.launch()