Obai33's picture
Update app.py
43629d4 verified
history blame
4.09 kB
# -*- coding: utf-8 -*-
Automatically generated by Colab.
Original file is located at
pip install transformers
pip install tashaphyne
pip install gradio
pip install translate
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from transformers import BertTokenizer
from transformers import AutoTokenizer
import random
from tashaphyne import normalize
import re
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, GRU
import tensorflow as tf
from transformers import AutoTokenizer
aurl = 'https://raw.githubusercontent.com/Obai33/NLP_PoemGenerationDatasets/main/arabicpoems.csv'
adf = pd.read_csv(aurl)
# Function to normalize text
def normalize_text(text):
return text
# Normalize the text
allah = normalize_text('ุงู„ู„ู‡')
adf = adf['poem_text']
i = random.randint(0, len(adf))
adf = adf.sample(n=100, random_state=i)
adf = adf.apply(lambda x: normalize_text(x))
adf = adf[~adf.str.contains(allah)]
# Function to clean text
def remove_non_arabic_symbols(text):
arabic_pattern = r'[\u0600-\u06FF\s]+'
arabic_text = re.findall(arabic_pattern, text)
cleaned_text = ''.join(arabic_text)
return cleaned_text
# Clean the text
adf = adf.apply(lambda x: remove_non_arabic_symbols(x))
# Tokenize the text
tokenizer = AutoTokenizer.from_pretrained("aubmindlab/bert-base-arabertv2")
tokens = tokenizer.tokenize(adf.tolist(), is_split_into_words=True)
input_sequences = []
for line in adf:
token_list = tokenizer.encode(line, add_special_tokens=True)
for i in range(1, len(token_list)):
n_gram_sequence = token_list[:i+1]
max_sequence_len = 100
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
total_words = tokenizer.vocab_size
xs, labels = input_sequences[:, :-1], input_sequences[:, -1]
ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)
print('error not here')
import requests
# URL of the model
url = 'https://github.com/Obai33/NLP_PoemGenerationDatasets/raw/main/modelarab1.h5'
# Local file path to save the model
local_filename = 'modelarab1.h5'
# Download the model file
response = requests.get(url)
with open(local_filename, 'wb') as f:
model = tf.keras.models.load('my_model')
print('ok model loaded')
# Import the necessary library for translation
import translate
# Function to translate text to English
def translate_to_english(text):
translator = translate.Translator(from_lang="ar", to_lang="en")
translated_text = translator.translate(text)
return translated_text
def generate_arabic_text(seed_text, next_words=50):
generated_text = seed_text
for _ in range(next_words):
token_list = tokenizer.encode(generated_text, add_special_tokens=False)
token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
predicted = np.argmax(model.predict(token_list), axis=-1)
output_word = tokenizer.decode(predicted[0])
generated_text += " " + output_word
reconnected_text = generated_text.replace(" ##", "")
t_text = translate_to_english(reconnected_text)
return reconnected_text, t_text
import gradio as gr
print('error not here')
# Update Gradio interface to include both Arabic and English outputs
iface = gr.Interface(
outputs=["text", "text"],
title="Arabic Poetry Generation",
description="Enter Arabic text to generate a small poem.",
# Run the interface
iface.launch(share = True)