fazni's picture
added research file
948e385
raw
history blame contribute delete
No virus
2.15 kB
import torch
import pickle
import joblib
import numpy as np
import tensorflow as tf
from keras.utils import pad_sequences
from keras.preprocessing.text import Tokenizer
from transformers import AutoModelForSequenceClassification, AutoTokenizer
# Load the model from the pickle file
# filename = 'F:/CVFilter/models/model_pk.pkl'
# with open(filename, 'rb') as file:
# model = pickle.load(file)
# Load the saved model
# model = joblib.load('F:\CVFilter\models\model.joblib')
# Load Local Model and Local tokenizer
# model = tf.keras.models.load_model('models\model.h5')
# tokenfile = 'tokenized_words/tokenized_words.pkl'
# # Load the tokenized words from the pickle file
# with open(tokenfile, 'rb') as file:
# loaded_tokenized_words = pickle.load(file)
# max_review_length = 200
# tokenizer = Tokenizer(num_words=10000, #max no. of unique words to keep
# filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~',
# lower=True #convert to lower case
# )
# tokenizer.fit_on_texts(loaded_tokenized_words)
# Load Huggingface model and tokenizer
# Define the model name
model_name = "fazni/distilbert-base-uncased-career-path-prediction"
# Load the model
model = AutoModelForSequenceClassification.from_pretrained(model_name)
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
outcome_labels = ['Business Analyst', 'Cyber Security','Data Engineer','Data Science','DevOps','Machine Learning Engineer','Mobile App Developer','Network Engineer','Quality Assurance','Software Engineer']
def model_prediction(text, model=model, tokenizer=tokenizer, labels=outcome_labels):
# Local model
# seq = tokenizer.texts_to_sequences([text])
# padded = pad_sequences(seq, maxlen=max_review_length)
# pred = model.predict(padded)
# return labels[np.argmax(pred)]
# Hugging face model
# Tokenize the text
inputs = tokenizer(text, return_tensors="pt",truncation=True, max_length=512)
outputs = model(**inputs)
# Get the predicted class probabilities
probs = outputs.logits.softmax(dim=-1)
return labels[torch.argmax(probs)]