""" Copyright 2024 X_G85 Model Integration Utils ------------------------- """ # Author: Adam-Al-Rahman import numpy as np import pandas as pd import tensorflow as tf from tensorflow.keras.preprocessing.text import tokenizer_from_json from tensorflow.keras.preprocessing.sequence import pad_sequences def tokenizer(arch: str, tokenizer_json: str, text: str, max_length=300): """ ::param:: arch: type of model `Bstm` or `Bert` """ tokenized_data = None if arch == "Lstm": # Load the tokenizer from the JSON file with open(tokenizer_json) as file: data = file.read() tokenizer = tokenizer_from_json(data) # Use the tokenizer to transform test data tokenized_text = tokenizer.texts_to_sequences(text) tokenized_data = pad_sequences(tokenized_text, maxlen=max_length) tokenized_data = tokenized_data.astype(np.float32) return tokenized_data def modelx( arch: str, model_path: str, text: str, tokenizer_json: str = "", batch_size=32, max_length=300, ): model_result = None if tokenizer_json: text = tokenizer(arch, tokenizer_json, text, max_length) else: text = pd.Series(text) if arch == "Lstm": model = tf.keras.models.load_model(model_path) model_result = model.predict(text, batch_size=batch_size) model_result = tf.squeeze(tf.round(model_result)) if model_result == 1.0: model_result = "REAL NEWS" elif model_result == 0.0: model_result = "FAKE NEWS" return model_result