Spaces:

cdactvm
/

ENGLISH_ASR

Sleeping

File size: 10,492 Bytes

3b58a97

#!/usr/bin/env python
# coding: utf-8

# In[1]:


# # # Function to convert Hindi text to numerical representation
# from isNumber import is_number

# def text_to_int (textnum, numwords={}):
#     units = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
#             'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',
#             'sixteen', 'seventeen', 'eighteen', 'nineteen']
#     tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']
#     scales = ['hundred', 'thousand', 'lac','million', 'billion', 'trillion']
#     ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 'eighth':8, 'ninth':9, 'twelfth':12}
#     ordinal_endings = [('ieth', 'y'), ('th', '')]

#     if not numwords:
#         numwords['and'] = (1, 0)
#         for idx, word in enumerate(units): numwords[word] = (1, idx)
#         for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)
#         for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)

#     textnum = textnum.replace('-', ' ')

#     current = result = 0
#     curstring = ''
#     onnumber = False
#     lastunit = False
#     lastscale = False

#     def is_numword(x):
#         if is_number(x):
#             return True
#         if word in numwords:
#             return True
#         return False

#     def from_numword(x):
#         if is_number(x):
#             scale = 0
#             increment = int(x.replace(',', ''))
#             return scale, increment
#         return numwords[x]

#     for word in textnum.split():
#         if word in ordinal_words:
#             scale, increment = (1, ordinal_words[word])
#             current = current * scale + increment
#             if scale > 100:
#                 result += current
#                 current = 0
#             onnumber = True
#             lastunit = False
#             lastscale = False
#         else:
#             for ending, replacement in ordinal_endings:
#                 if word.endswith(ending):
#                     word = "%s%s" % (word[:-len(ending)], replacement)

#             if (not is_numword(word)) or (word == 'and' and not lastscale):
#                 if onnumber:
#                     # Flush the current number we are building
#                     curstring += repr(result + current) + " "
#                 curstring += word + " "
#                 result = current = 0
#                 onnumber = False
#                 lastunit = False
#                 lastscale = False
#             else:
#                 scale, increment = from_numword(word)
#                 onnumber = True

#                 if lastunit and (word not in scales):                                                                                                                                                                                                                                         
#                     # Assume this is part of a string of individual numbers to                                                                                                                                                                                                                
#                     # be flushed, such as a zipcode "one two three four five"                                                                                                                                                                                                                 
#                     curstring += repr(result + current)                                                                                                                                                                                                                                       
#                     result = current = 0                                                                                                                                                                                                                                                      

#                 if scale > 1:                                                                                                                                                                                                                                                                 
#                     current = max(1, current)                                                                                                                                                                                                                                                 

#                 current = current * scale + increment                                                                                                                                                                                                                                         
#                 if scale > 100:                                                                                                                                                                                                                                                               
#                     result += current                                                                                                                                                                                                                                                         
#                     current = 0                                                                                                                                                                                                                                                               

#                 lastscale = False                                                                                                                                                                                                              
#                 lastunit = False                                                                                                                                                
#                 if word in scales:                                                                                                                                                                                                             
#                     lastscale = True                                                                                                                                                                                                         
#                 elif word in units:                                                                                                                                                                                                             
#                     lastunit = True

#     if onnumber:
#         curstring += repr(result + current)

#     return curstring


# In[5]:


import nbimporter
from isNumber import is_number  # Remove or replace this if unnecessary

def text_to_int(textnum, numwords={}):
    # Define units, tens, and scales including "lac"
    units = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
            'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',
            'sixteen', 'seventeen', 'eighteen', 'nineteen']
    tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']
    scales = ['hundred', 'thousand', 'lakh', 'million', 'billion', 'trillion'] 
    ordinal_words = {'first': 1, 'second': 2, 'third': 3, 'fifth': 5, 'eighth': 8, 'ninth': 9, 'twelfth': 12}
    ordinal_endings = [('ieth', 'y'), ('th', '')]

    if not numwords:
        numwords['and'] = (1, 0)  # Handle "one hundred and twenty"
        
        # Add units, tens, and scales to numwords
        for idx, word in enumerate(units):
            numwords[word] = (1, idx)
        for idx, word in enumerate(tens):
            numwords[word] = (1, idx * 10)
        for idx, word in enumerate(scales):
            numwords[word] = (10 ** (5 if word == 'lakh' else idx * 3 or 2), 0)  # Handle "lac" as 10^5

    # Remove hyphens and normalize input
    textnum = textnum.replace('-', ' ')

    current = result = 0
    curstring = ''
    onnumber = False
    lastunit = False
    lastscale = False

    def is_numword(x):
        return is_number(x) or x in numwords

    def from_numword(x):
        if is_number(x):
            return 0, int(x.replace(',', ''))
        return numwords[x]

    for word in textnum.split():
        if word in ordinal_words:
            scale, increment = (1, ordinal_words[word])
            current = current * scale + increment
            if scale > 100:
                result += current
                current = 0
            onnumber = True
            lastunit = False
            lastscale = False
        else:
            for ending, replacement in ordinal_endings:
                if word.endswith(ending):
                    word = f"{word[:-len(ending)]}{replacement}"

            if not is_numword(word) or (word == 'and' and not lastscale):
                if onnumber:
                    curstring += repr(result + current) + " "
                curstring += word + " "
                result = current = 0
                onnumber = False
                lastunit = False
                lastscale = False
            else:
                scale, increment = from_numword(word)
                onnumber = True

                if lastunit and word not in scales:
                    curstring += repr(result + current) + " "
                    result = current = 0

                if scale > 1:
                    current = max(1, current)

                current = current * scale + increment

                if scale >= 100:
                    result += current
                    current = 0

                lastscale = word in scales
                lastunit = word in units

    if onnumber:
        curstring += repr(result + current)

    return curstring.strip()


# In[ ]: