Spaces:

KwabsHug
/

Language-Learn-Idea

Sleeping

App Files Files Community

Language-Learn-Idea / app.py

KwabsHug

Create app.py

2c4ef8c almost 2 years ago

raw

history blame

4.03 kB

	from googletrans import Translator
	import spacy
	import gradio as gr

	spacy.cli.download("en_core_web_sm")

	nlp = spacy.load('en_core_web_sm')
	translator = Translator()

	def Sentencechunker(sentence):
	Sentchunks = sentence.split(" ")
	chunks = []
	for i in range(len(Sentchunks)):
	chunks.append(" ".join(Sentchunks[:i+1]))
	return " \| ".join(chunks)

	def ReverseSentenceChunker(sentence):
	reversed_sentence = " ".join(reversed(sentence.split()))
	chunks = Sentencechunker(reversed_sentence)
	return chunks

	def three_words_chunk(sentence):
	words = sentence.split()
	chunks = [words[i:i+3] for i in range(len(words)-2)]
	chunks = [" ".join(chunk) for chunk in chunks]
	return " \| ".join(chunks)

	def keep_nouns_verbs(sentence):
	doc = nlp(sentence)
	nouns_verbs = []
	for token in doc:
	if token.pos_ in ['NOUN','VERB','PUNCT']:
	nouns_verbs.append(token.text)
	return " ".join(nouns_verbs)

	def unique_word_count(text="", state=None):
	if state is None:
	state = {}
	words = text.split()
	word_counts = state
	for word in words:
	if word in word_counts:
	word_counts[word] += 1
	else:
	word_counts[word] = 1
	sorted_word_counts = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
	return sorted_word_counts,

	"""
	sentence = "Please help me create a sentence chunker"
	sentencechunks = Sentencechunker(sentence)
	reversed_chunks = ReverseSentenceChunker(sentence)
	TWchunks = three_words_chunk(sentence)
	nouns_verbs = keep_nouns_verbs(sentence)
	"""

	# Translate from English to French

	langdest = gr.Dropdown(choices=["af", "de", "es", "ko", "ja", "zh-cn"], label="Choose Language", value="de")

	"""
	def VarTrans(text, langdest):
	translated = translator.translate(text, dest=langdest)
	SCtranslated = translator.translate(sentencechunks, dest=langdest)
	RCtranslated = translator.translate(reversed_chunks, dest=langdest)
	TWCtranslated = translator.translate(TWchunks, dest=langdest)
	return translated, SCtranslated, RCtranslated, TWCtranslated
	"""

	ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks"], label="Choose Chunk Type")

	def FrontRevSentChunk (Chunkmode, Translate, Text, langdest):
	FinalOutput = ""
	TransFinalOutput = ""
	if Chunkmode=="Chunks":
	FinalOutput += Sentencechunker(Text)
	if Chunkmode=="Reverse":
	FinalOutput += ReverseSentenceChunker(Text)
	if Chunkmode=="Three Word Chunks":
	FinalOutput += three_words_chunk(Text)

	if Translate:
	TransFinalOutput = FinalOutput
	translated = translator.translate(TransFinalOutput, dest=langdest)
	FinalOutput += "\n" + translated.text
	return FinalOutput

	"""
	print(translated.text)
	print(sentencechunks)
	print(SCtranslated.text)
	print(reversed_chunks)
	print(RCtranslated.text)
	print(TWchunks)
	print(TWCtranslated.text)
	print(nouns_verbs)
	"""

	def Wordchunker(word):
	chunks = []
	for i in range(len(word)):
	chunks.append(word[:i+1])
	return chunks

	word = "please"
	wordchunks = Wordchunker(word)
	print("\n")
	print(wordchunks)

	#random_chunk_display(TWCtranslated.text)

	with gr.Blocks() as lliface:
	gr.HTML("<p> Still Undercontruction </p> <> Arrows app json creator for easy knowledge graphing and spacy POS graph? </p> <p> https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles, https://huggingface.co/spaces/vumichien/whisper-speaker-diarization, Maybe duplicate these, private them and then load into spaces? --> Whisper space for youtube, Clip Interrogator, load here and all my random functions esp. text to HTML </p>")
	gr.Interface(fn=FrontRevSentChunk, inputs=[ChunkModeDrop, "checkbox", "text", langdest], outputs="text")
	gr.Interface(fn=keep_nouns_verbs, inputs=["text"], outputs="text", title="Noun and Verbs only (Plus punctuation")
	gr.HTML("Add a codepen pen page here")
	gr.Interface(fn=unique_word_count, inputs="text", outputs="text", title="Wordcounter")

	lliface.launch()