NorHsangPha
commited on
Commit
•
949c5da
1
Parent(s):
5aa6e12
Initial: initial commit
Browse files- app.py +17 -0
- lid.py +52 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from lid import identify_and_plot, LID_EXAMPLES
|
3 |
+
|
4 |
+
lid_indentify = gr.Interface(
|
5 |
+
fn=identify_and_plot,
|
6 |
+
inputs=gr.Textbox(lines=2, label="Input text"),
|
7 |
+
outputs=[gr.Textbox(label="Language"), gr.Plot(label="Confidence Plot")],
|
8 |
+
examples=LID_EXAMPLES,
|
9 |
+
title="Language Identification Demo",
|
10 |
+
description="Identify the language of input text and view confidence levels.",
|
11 |
+
allow_flagging="never",
|
12 |
+
)
|
13 |
+
|
14 |
+
with gr.Blocks() as demo:
|
15 |
+
lid_indentify.render()
|
16 |
+
|
17 |
+
demo.launch()
|
lid.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import fasttext
|
2 |
+
from huggingface_hub import hf_hub_download
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import seaborn as sns
|
5 |
+
|
6 |
+
# Download the model from Hugging Face Hub
|
7 |
+
model_path = hf_hub_download(
|
8 |
+
# repo_id="cis-lmu/glotlid", filename="model.bin", cache_dir="./glotlid"
|
9 |
+
repo_id="facebook/fasttext-language-identification",
|
10 |
+
filename="model.bin",
|
11 |
+
cache_dir="fasttext",
|
12 |
+
)
|
13 |
+
model = fasttext.load_model(model_path)
|
14 |
+
|
15 |
+
|
16 |
+
def identify_languages(title):
|
17 |
+
predictions = model.predict(title, k=5) # Get top 5 predictions
|
18 |
+
labels = predictions[0]
|
19 |
+
confidences = predictions[1]
|
20 |
+
|
21 |
+
results = []
|
22 |
+
for label, confidence in zip(labels, confidences):
|
23 |
+
language = label.replace("__label__", "")
|
24 |
+
results.append((language, confidence))
|
25 |
+
|
26 |
+
return results
|
27 |
+
|
28 |
+
|
29 |
+
def plot_confidences(results):
|
30 |
+
languages = [result[0] for result in results]
|
31 |
+
confidences = [result[1] for result in results]
|
32 |
+
|
33 |
+
plt.figure(figsize=(10, 6))
|
34 |
+
sns.barplot(x=confidences, y=languages)
|
35 |
+
plt.xlabel("Confidence")
|
36 |
+
plt.ylabel("Language")
|
37 |
+
plt.title("Language Identification Confidence")
|
38 |
+
|
39 |
+
return plt
|
40 |
+
|
41 |
+
|
42 |
+
def identify_and_plot(title):
|
43 |
+
results = identify_languages(title)
|
44 |
+
plot = plot_confidences(results)
|
45 |
+
return results, plot
|
46 |
+
|
47 |
+
|
48 |
+
LID_EXAMPLES = [
|
49 |
+
"ပိုၼ်းၵႅပ်ႈလိၵ်ႈလၢႆးတႆး ဢၼ်ၶူးပွင်သွၼ်လူင်လိၵ်ႈလၢႆးတႆး",
|
50 |
+
"ယု၀တီဂျင်းဖောမယ်၊ ရှမ်းစာပေသမိုင်းနှင့်",
|
51 |
+
"Hello World, မႂ်ႇသုင်ၶႃႈ",
|
52 |
+
]
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fasttext-wheel
|
2 |
+
huggingface_hub
|
3 |
+
matplotlib
|
4 |
+
seaborn
|