File size: 4,398 Bytes
7473ba2
e29da5d
 
7473ba2
02ffc6e
7473ba2
 
02ffc6e
 
 
 
 
7473ba2
 
02ffc6e
 
7473ba2
02ffc6e
7473ba2
9911f95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import torch
import pandas as pd
import gradio as gr
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, util, models


q_encoder = SentenceTransformer(modules=[
    models.Transformer(model_name_or_path="checkpoints/q_encoder", max_seq_length=512),
    models.Pooling(word_embedding_dimension=768, pooling_mode='cls'),
])
doc_embeddings = torch.load('checkpoints/doc_embeddings.pt', map_location=torch.device('cpu'))
docs = pd.DataFrame(load_dataset("antoiloui/bsard", data_files="articles_fr.csv")['train'])

def search(question):
    q_emb = q_encoder.encode(question, convert_to_tensor=True)
    hits = util.semantic_search(q_emb, doc_embeddings, top_k=100, score_function=util.cos_sim)[0]
    return {docs.loc[h['corpus_id'], 'article'] + '\n\n' + f"- Art. {docs.loc[h['corpus_id'], 'article_no']}, {docs.loc[h['corpus_id'], 'code']}" for h in hits[:5]}

title = "Belgian Legislation Search"
description = "A biencoder model was trained to retrieve relevant statutory articles to legal issues. Ask it a question in French!"
article = """
The model will return the most semantically relevant laws from a corpus of 22,633 statutory articles collected from 32 Belgian codes:

| Authority | Code                                                | #Articles |
|-----------|-----------------------------------------------------|-----------|
| Federal   | Judicial Code                                       | 2285      |
|           | Code of Economic Law                                | 2032      |
|           | Civil Code                                          | 1961      |
|           | Code of Workplace Welfare                           | 1287      |
|           | Code of Companies and Associations                  | 1194      |
|           | Code of Local Democracy and Decentralization        | 1159      |
|           | Navigation Code                                     | 977       |
|           | Code of Criminal Instruction                        | 719       |
|           | Penal Code                                          | 689       |
|           | Social Penal Code                                   | 307       |
|           | Forestry Code                                       | 261       |
|           | Railway Code                                        | 260       |
|           | Electoral Code                                      | 218       |
|           | The Constitution                                    | 208       |
|           | Code of Various Rights and Taxes                    | 191       |
|           | Code of Private International Law                   | 135       |
|           | Consular Code                                       | 100       |
|           | Rural Code                                          | 87        |
|           | Military Penal Code                                 | 66        |
|           | Code of Belgian Nationality                         | 31        |
| Regional  | Walloon Code of Social Action and Health            | 3650      |
|           | Walloon Code of the Environment                     | 1270      |
|           | Walloon Code of Territorial Development             | 796       |
|           | Walloon Public Service Code                         | 597       |
|           | Walloon Code of Agriculture                         | 461       |
|           | Brussels Spatial Planning Code                      | 401       |
|           | Walloon Code of Basic and Secondary Education       | 310       |
|           | Walloon Code of Sustainable Housing                 | 286       |
|           | Brussels Housing Code                               | 279       |
|           | Brussels Code of Air, Climate and Energy Management | 208       |
|           | Walloon Animal Welfare Code                         | 108       |
|           | Brussels Municipal Electoral Code                   | 100       |
| Total     |                                                     | 22633     |

"""
examples = [
    "Qu'est-ce que je risque si je viole le secret professionnel ?",
    "Mon employeur peut-il me licencier alors que je suis malade ?",
    "Mon voisin fait beaucoup de bruit, que faire ?",
]
gr.Interface(fn=search, inputs=['text'], outputs=['textbox']*5, allow_flagging="never", title=title, description=description, article=article, examples=examples).launch()