Spaces:

cdactvm
/

Hindi_ASR

Sleeping

App Files Files Community

cdactvm commited on Oct 17, 2024

Commit

bdfde60

verified ·

1 Parent(s): a8e84bc

Update convert2list.py

Browse files

Files changed (1) hide show

convert2list.py +89 -108

convert2list.py CHANGED Viewed

@@ -1,108 +1,89 @@
-#!/usr/bin/env python
-# coding: utf-8
-# In[ ]:
-# import nbimporter
-import nbimporter
-from Text2List import text_to_list
-def convert_to_list(text, text_list):
-    matched_words = []
-    unmatched_text = ''  # To accumulate unmatched characters
-    # Sort text_list by length in descending order to prioritize longest matches first
-    text_list_sorted = sorted(text_list, key=len, reverse=True)
-    while text:
-        matched = False
-        for word in text_list_sorted:
-            if text.startswith(word):
-                # Add any accumulated unmatched text before appending the matched word
-                if unmatched_text:
-                    matched_words.append(unmatched_text)
-                    unmatched_text = ''  # Reset unmatched text accumulator
-                matched_words.append(word)
-                text = text[len(word):]  # Remove the matched part from text
-                matched = True
-                break
-        if not matched:
-            # Accumulate unmatched characters
-            unmatched_text += text[0]
-            text = text[1:]
-    # If there's any remaining unmatched text, add it to the result
-    if unmatched_text:
-        matched_words.append(unmatched_text)
-    # Join matched words and unmatched text with a space
-    result = ' '.join(matched_words)
-    return result
-# text = "जीरोएकदोतीनचारपांचछहसातआठनौदसजीरोएकदोतीनचारपांच"
-if __name__=="__main__":
-    converted=convert_to_list(text, text_to_list())
-    print(converted)
-# In[ ]:
-# # import nbimporter
-# import nbimporter
-# from Text2List import text_to_list
-# def convert_to_list(text, text_list):
-#     matched_words = []
-#     unmatched_text = ''  # To accumulate unmatched characters
-#     # Sort text_list by length in descending order to prioritize longest matches first
-#     text_list_sorted = sorted(text_list, key=len, reverse=True)
-#     while text:
-#         matched = False
-#         for word in text_list_sorted:
-#             if word in text:
-#                 # Add any accumulated unmatched text before appending the matched word
-#                 if unmatched_text:
-#                     matched_words.append(unmatched_text)
-#                     unmatched_text = ''  # Reset unmatched text accumulator
-#                 matched_words.append(word)
-#                 text = text[len(word):]  # Remove the matched part from text
-#                 matched = True
-#                 break
-#         if not matched:
-#             # Accumulate unmatched characters
-#             unmatched_text += text[0]
-#             text = text[1:]
-#     # If there's any remaining unmatched text, add it to the result
-#     if unmatched_text:
-#         matched_words.append(unmatched_text)
-#     # Join matched words and unmatched text with a space
-#     result = ' '.join(matched_words)
-#     return result
-# text = "जीरोएकदोतीनचार"
-# if __name__=="__main__":
-#     converted=convert_to_list(text, text_to_list())
-#     print(converted)
-# In[ ]:
-get_ipython().system('git clone https://huggingface.co/StephennFernandes/wav2vec2-XLS-R-300m-konkani')
-# In[ ]:

+# import nbimporter
+import nbimporter
+from Text2List import text_to_list
+def convert_to_list(text, text_list):
+    matched_words = []
+    unmatched_text = ''  # To accumulate unmatched characters
+    # Sort text_list by length in descending order to prioritize longest matches first
+    text_list_sorted = sorted(text_list, key=len, reverse=True)
+    while text:
+        matched = False
+        for word in text_list_sorted:
+            if text.startswith(word):
+                # Add any accumulated unmatched text before appending the matched word
+                if unmatched_text:
+                    matched_words.append(unmatched_text)
+                    unmatched_text = ''  # Reset unmatched text accumulator
+                matched_words.append(word)
+                text = text[len(word):]  # Remove the matched part from text
+                matched = True
+                break
+        if not matched:
+            # Accumulate unmatched characters
+            unmatched_text += text[0]
+            text = text[1:]
+    # If there's any remaining unmatched text, add it to the result
+    if unmatched_text:
+        matched_words.append(unmatched_text)
+    # Join matched words and unmatched text with a space
+    result = ' '.join(matched_words)
+    return result
+# text = "जीरोएकदोतीनचारपांचछहसातआठनौदसजीरोएकदोतीनचारपांच"
+if __name__=="__main__":
+    converted=convert_to_list(text, text_to_list())
+    print(converted)
+# In[ ]:
+# # import nbimporter
+# import nbimporter
+# from Text2List import text_to_list
+# def convert_to_list(text, text_list):
+#     matched_words = []
+#     unmatched_text = ''  # To accumulate unmatched characters
+#     # Sort text_list by length in descending order to prioritize longest matches first
+#     text_list_sorted = sorted(text_list, key=len, reverse=True)
+#     while text:
+#         matched = False
+#         for word in text_list_sorted:
+#             if word in text:
+#                 # Add any accumulated unmatched text before appending the matched word
+#                 if unmatched_text:
+#                     matched_words.append(unmatched_text)
+#                     unmatched_text = ''  # Reset unmatched text accumulator
+#                 matched_words.append(word)
+#                 text = text[len(word):]  # Remove the matched part from text
+#                 matched = True
+#                 break
+#         if not matched:
+#             # Accumulate unmatched characters
+#             unmatched_text += text[0]
+#             text = text[1:]
+#     # If there's any remaining unmatched text, add it to the result
+#     if unmatched_text:
+#         matched_words.append(unmatched_text)
+#     # Join matched words and unmatched text with a space
+#     result = ' '.join(matched_words)
+#     return result
+# text = "जीरोएकदोतीनचार"
+# if __name__=="__main__":
+#     converted=convert_to_list(text, text_to_list())
+#     print(converted)