File size: 7,892 Bytes
4393529 43dcf13 e085476 14f5c56 24668a8 4393529 67c57d4 4393529 1f6aafc b317b0c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 |
from flask import Flask
import torch
from torch import nn
import re
import numpy as np
import pandas as pd
from collections import OrderedDict
# import requests
# from bs4 import BeautifulSoup
app = Flask(__name__)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
if device == 'cuda:0':
torch.cuda.set_device(device)
print(device)
def extract_text_from_link(url):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
text = soup.get_text()
return text
doc = """The word "deep" in "deep learning" refers to the number of layers through which the data is transformed. More precisely,
deep learning systems have a substantial credit assignment path (CAP) depth. The CAP is the chain of transformations from input to
output. CAPs describe potentially causal connections between input and output. For a feedforward neural network, the depth of the
CAPs is that of the network and is the number of hidden layers plus one (as the output layer is also parameterized). For recurrent
neural networks, in which a signal may propagate through a layer more than once, the CAP depth is potentially unlimited.[13] No
universally agreed-upon threshold of depth divides shallow learning from deep learning, but most researchers agree that deep
learning involves CAP depth higher than 2. CAP of depth 2 has been shown to be a universal approximator in the sense that it
can emulate any function.[14] Beyond that, more layers do not add to the function approximator ability of the network. Deep
models (CAP > 2) are able to extract better features than shallow models and hence, extra layers help in learning the features
effectively."""
class Text2Words:
def __init__(self, document):
self.text_all = re.findall(r'\b[A-Za-z]+\b', document)
self.text = list(set(self.text_all))
self.chars_all = ''.join(self.text)
self.chars = self.unique_chars(self.chars_all)
self.int2char = dict(enumerate(self.chars))
self.char2int = {char: ind for ind, char in self.int2char.items()}
self.maxlen = len(max(self.text, key=len))
self.update_text()
self.input_seq_char, self.target_seq_char = self.get_seq_char(self.text)
self.input_seq_index, self.target_seq_index = self.get_seq(self.char2int, self.input_seq_char, self.target_seq_char, len(self.text))
self.dict_size = len(self.char2int)
self.seq_len = self.maxlen - 1
self.batch_size = len(self.text)
self.input_seq = self.one_hot_encode(self.input_seq_index, self.dict_size, self.seq_len, self.batch_size)
def one_hot_encode(self, sequence, dict_size, seq_len, batch_size):
# Creating a multi-dimensional array of zeros with the desired output shape
features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32)
# Replacing the 0 at the relevant character index with a 1 to represent that character
for i in range(batch_size):
for u in range(seq_len):
features[i, u, sequence[i][u]] = 1
return features
def get_seq(self, char2int, input_seq_char, target_seq_char,n):
x=[]
y=[]
for i in range(n):
x.append([char2int[character] for character in input_seq_char[i]])
y.append([char2int[character] for character in target_seq_char[i]])
return x,y
def get_seq_char(self, text):
input_seq = []
target_seq = []
for i in range(len(text)):
# Remove last character for input sequence
input_seq.append(text[i][:-1])
# Remove first character for target sequence
target_seq.append(text[i][1:])
return input_seq, target_seq
def unique_chars(self, chars_all):
chars = []
for letter in chars_all:
if letter not in chars:
chars.append(letter)
# chars = sorted(chars)
if ' ' not in chars:
chars.append(' ')
return sorted(chars)
def update_text(self):
for i in range(len(self.text)):
while len(self.text[i])<self.maxlen:
self.text[i] += ' '
def description(self):
text = {}
for word in self.text:
char = word[0]
if char not in text:
text[char] = []
text[char].append(word.strip())
for k,v in (sorted(text.items())):
print(f'{k} : {sorted(v)}')
def lengt_analysis(self):
text = {}
words = set(self.text_all)
for word in words:
n = len(word)
if n not in text:
text[n] = []
text[n].append(word.strip())
for k,v in (sorted(text.items())):
print(f'{k} : count = {len(v)} list = {sorted(v)}')
return None # text
def create_object(doc):
return Text2Words(doc)
def get_inputs(obj):
input_seq = torch.tensor(obj.input_seq, device=device)
target_seq_index = torch.tensor(obj.target_seq_index, device=device)
return input_seq, target_seq_index
class Model(nn.Module):
def __init__(self, input_size, output_size, hidden_dim, n_layers):
super(Model, self).__init__()
# Defining some parameters
self.hidden_dim = hidden_dim
self.n_layers = n_layers
#Defining the layers
# RNN Layer
self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)
# Fully connected layer
self.fc = nn.Linear(hidden_dim, output_size)
def forward(self, x):
batch_size = x.size(0)
hidden = self.init_hidden(batch_size)
out, hidden = self.rnn(x, hidden)
out = out.contiguous().view(-1, self.hidden_dim)
out = self.fc(out)
return out, hidden
def init_hidden(self, batch_size):
# This method generates the first hidden state of zeros
torch.manual_seed(42)
hidden = torch.zeros((self.n_layers, batch_size, self.hidden_dim), device=device)
return hidden
def create_model(obj):
model = Model(input_size=obj.dict_size, output_size=obj.dict_size, hidden_dim=2*obj.dict_size, n_layers=1)
model.to(device)
lr=0.01
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
return model, criterion, optimizer
# This function takes in the model and character as arguments and returns the next character prediction and hidden state
def predict(model, character):
# One-hot encoding our input to fit into the model
# print(character)
character = np.array([[obj.char2int[c] for c in character]])
# print(character)
character = obj.one_hot_encode(character, obj.dict_size, character.shape[1], 1)
# print(character,character.shape)
character = torch.tensor(character, device=device)
character.to(device)
out, hidden = model(character)
# print(out, hidden)
prob = nn.functional.softmax(out[-1], dim=0).data
# print(prob)
char_ind = torch.max(prob, dim=0)[1].item()
# print(sorted(prob, reverse=True))
return obj.int2char[char_ind], hidden
# This function takes the desired output length and input characters as arguments, returning the produced sentence
def sample(model, out_len, start='h'):
model.eval() # eval mode
chars = [ch for ch in start]
char = chars[-1]
chars = chars[:-1]
# Now pass in the previous characters and get a new one
while char != ' ':
chars.append(char)
char, h = predict(model, chars)
return ''.join(chars)
def load_checkpoint(filepath):
checkpoint = torch.load(filepath)
# print(checkpoint['state_dict'])
model = checkpoint['model']
# print(model)
model.load_state_dict(checkpoint['state_dict'])
# print(model.parameters())
# for parameter in model.parameters():
# parameter.requires_grad = False
# print(parameter)
model.eval()
return model
@app.route('/')
def home():
print(1)
return {'key':"Hello HuggingFace! Successfully deployed. "}
# model = load_checkpoint('checkpoint.pth')
# print(2)
# res = sample(model, obj.maxlen, 'ap')
# print(3)
# return {'key':res} |