NorHsangPha commited on
Commit
949c5da
1 Parent(s): 5aa6e12

Initial: initial commit

Browse files
Files changed (3) hide show
  1. app.py +17 -0
  2. lid.py +52 -0
  3. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from lid import identify_and_plot, LID_EXAMPLES
3
+
4
+ lid_indentify = gr.Interface(
5
+ fn=identify_and_plot,
6
+ inputs=gr.Textbox(lines=2, label="Input text"),
7
+ outputs=[gr.Textbox(label="Language"), gr.Plot(label="Confidence Plot")],
8
+ examples=LID_EXAMPLES,
9
+ title="Language Identification Demo",
10
+ description="Identify the language of input text and view confidence levels.",
11
+ allow_flagging="never",
12
+ )
13
+
14
+ with gr.Blocks() as demo:
15
+ lid_indentify.render()
16
+
17
+ demo.launch()
lid.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fasttext
2
+ from huggingface_hub import hf_hub_download
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+
6
+ # Download the model from Hugging Face Hub
7
+ model_path = hf_hub_download(
8
+ # repo_id="cis-lmu/glotlid", filename="model.bin", cache_dir="./glotlid"
9
+ repo_id="facebook/fasttext-language-identification",
10
+ filename="model.bin",
11
+ cache_dir="fasttext",
12
+ )
13
+ model = fasttext.load_model(model_path)
14
+
15
+
16
+ def identify_languages(title):
17
+ predictions = model.predict(title, k=5) # Get top 5 predictions
18
+ labels = predictions[0]
19
+ confidences = predictions[1]
20
+
21
+ results = []
22
+ for label, confidence in zip(labels, confidences):
23
+ language = label.replace("__label__", "")
24
+ results.append((language, confidence))
25
+
26
+ return results
27
+
28
+
29
+ def plot_confidences(results):
30
+ languages = [result[0] for result in results]
31
+ confidences = [result[1] for result in results]
32
+
33
+ plt.figure(figsize=(10, 6))
34
+ sns.barplot(x=confidences, y=languages)
35
+ plt.xlabel("Confidence")
36
+ plt.ylabel("Language")
37
+ plt.title("Language Identification Confidence")
38
+
39
+ return plt
40
+
41
+
42
+ def identify_and_plot(title):
43
+ results = identify_languages(title)
44
+ plot = plot_confidences(results)
45
+ return results, plot
46
+
47
+
48
+ LID_EXAMPLES = [
49
+ "ပိုၼ်းၵႅပ်ႈလိၵ်ႈလၢႆးတႆး ဢၼ်ၶူးပွင်သွၼ်လူင်လိၵ်ႈလၢႆးတႆး",
50
+ "ယု၀တီဂျင်းဖောမယ်၊ ရှမ်းစာပေသမိုင်းနှင့်",
51
+ "Hello World, မႂ်ႇသုင်ၶႃႈ",
52
+ ]
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fasttext-wheel
2
+ huggingface_hub
3
+ matplotlib
4
+ seaborn