from googletrans import Translator import spacy import gradio as gr spacy.cli.download("en_core_web_sm") nlp = spacy.load('en_core_web_sm') translator = Translator() def Sentencechunker(sentence): Sentchunks = sentence.split(" ") chunks = [] for i in range(len(Sentchunks)): chunks.append(" ".join(Sentchunks[:i+1])) return " | ".join(chunks) def ReverseSentenceChunker(sentence): reversed_sentence = " ".join(reversed(sentence.split())) chunks = Sentencechunker(reversed_sentence) return chunks def three_words_chunk(sentence): words = sentence.split() chunks = [words[i:i+3] for i in range(len(words)-2)] chunks = [" ".join(chunk) for chunk in chunks] return " | ".join(chunks) def keep_nouns_verbs(sentence): doc = nlp(sentence) nouns_verbs = [] for token in doc: if token.pos_ in ['NOUN','VERB','PUNCT']: nouns_verbs.append(token.text) return " ".join(nouns_verbs) def unique_word_count(text="", state=None): if state is None: state = {} words = text.split() word_counts = state for word in words: if word in word_counts: word_counts[word] += 1 else: word_counts[word] = 1 sorted_word_counts = sorted(word_counts.items(), key=lambda x: x[1], reverse=True) return sorted_word_counts, """ sentence = "Please help me create a sentence chunker" sentencechunks = Sentencechunker(sentence) reversed_chunks = ReverseSentenceChunker(sentence) TWchunks = three_words_chunk(sentence) nouns_verbs = keep_nouns_verbs(sentence) """ # Translate from English to French langdest = gr.Dropdown(choices=["af", "de", "es", "ko", "ja", "zh-cn"], label="Choose Language", value="de") """ def VarTrans(text, langdest): translated = translator.translate(text, dest=langdest) SCtranslated = translator.translate(sentencechunks, dest=langdest) RCtranslated = translator.translate(reversed_chunks, dest=langdest) TWCtranslated = translator.translate(TWchunks, dest=langdest) return translated, SCtranslated, RCtranslated, TWCtranslated """ ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks"], label="Choose Chunk Type") def FrontRevSentChunk (Chunkmode, Translate, Text, langdest): FinalOutput = "" TransFinalOutput = "" if Chunkmode=="Chunks": FinalOutput += Sentencechunker(Text) if Chunkmode=="Reverse": FinalOutput += ReverseSentenceChunker(Text) if Chunkmode=="Three Word Chunks": FinalOutput += three_words_chunk(Text) if Translate: TransFinalOutput = FinalOutput translated = translator.translate(TransFinalOutput, dest=langdest) FinalOutput += "\n" + translated.text return FinalOutput """ print(translated.text) print(sentencechunks) print(SCtranslated.text) print(reversed_chunks) print(RCtranslated.text) print(TWchunks) print(TWCtranslated.text) print(nouns_verbs) """ def Wordchunker(word): chunks = [] for i in range(len(word)): chunks.append(word[:i+1]) return chunks word = "please" wordchunks = Wordchunker(word) print("\n") print(wordchunks) #random_chunk_display(TWCtranslated.text) with gr.Blocks() as lliface: gr.HTML("
Still Undercontruction
<> Arrows app json creator for easy knowledge graphing and spacy POS graph?https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles, https://huggingface.co/spaces/vumichien/whisper-speaker-diarization, Maybe duplicate these, private them and then load into spaces? --> Whisper space for youtube, Clip Interrogator, load here and all my random functions esp. text to HTML
") gr.Interface(fn=FrontRevSentChunk, inputs=[ChunkModeDrop, "checkbox", "text", langdest], outputs="text") gr.Interface(fn=keep_nouns_verbs, inputs=["text"], outputs="text", title="Noun and Verbs only (Plus punctuation") gr.HTML("Add a codepen pen page here") gr.Interface(fn=unique_word_count, inputs="text", outputs="text", title="Wordcounter") lliface.launch()