File size: 4,620 Bytes
23e94c0
16bf80f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4604847
16bf80f
 
 
 
 
 
 
4604847
16bf80f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4604847
16bf80f
 
 
 
 
 
4604847
16bf80f
 
 
 
4604847
16bf80f
 
 
 
 
 
 
 
 
 
 
 
4604847
16bf80f
 
 
 
 
 
 
 
0ec3ad8
16bf80f
 
 
 
 
 
 
 
 
 
 
 
 
 
0ec3ad8
16bf80f
 
 
4604847
16bf80f
 
 
 
 
 
4604847
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import json
import logging
import re

# Set up logging
logging.basicConfig(
    filename="app.log",
    level=logging.INFO,
    format="%(asctime)s:%(levelname)s:%(message)s"
)

# Model and tokenizer loading function with caching
@st.cache_resource
def load_model():
    """
    Loads and caches the pre-trained language model and tokenizer.

    Returns:
        model: Pre-trained language model.
        tokenizer: Tokenizer for the model.
    """
    model_path = "Canstralian/pentest_ai"
    try:
        model = AutoModelForCausalLM.from_pretrained(
            model_path,
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            device_map="auto",
            load_in_4bit=False,
            load_in_8bit=True,
            trust_remote_code=True,
        )
        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
        logging.info("Model and tokenizer loaded successfully.")
        return model, tokenizer
    except Exception as e:
        logging.error(f"Error loading model: {e}")
        st.error("Failed to load model. Please check the logs.")
        return None, None

def sanitize_input(text):
    """
    Sanitizes and validates user input text to prevent injection or formatting issues.
    
    Args:
        text (str): User input text.

    Returns:
        str: Sanitized text.
    """
    if not isinstance(text, str):
        raise ValueError("Input must be a string.")
    # Basic sanitization to remove unwanted characters
    sanitized_text = re.sub(r"[^a-zA-Z0-9\s\.,!?]", "", text)
    return sanitized_text.strip()

def generate_text(model, tokenizer, instruction):
    """
    Generates text based on the provided instruction using the loaded model.

    Args:
        model: The language model.
        tokenizer: Tokenizer for encoding/decoding.
        instruction (str): Instruction text for the model.

    Returns:
        str: Generated text response from the model.
    """
    try:
        # Validate and sanitize instruction input
        instruction = sanitize_input(instruction)
        tokens = tokenizer.encode(instruction, return_tensors='pt').to('cuda')
        generated_tokens = model.generate(
            tokens, 
            max_length=1024, 
            top_p=1.0, 
            temperature=0.5, 
            top_k=50
        )
        generated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
        logging.info("Text generated successfully.")
        return generated_text
    except Exception as e:
        logging.error(f"Error generating text: {e}")
        return "Error in text generation."

@st.cache_data
def load_json_data():
    """
    Loads JSON data, simulating the loading process with a sample list.

    Returns:
        list: A list of dictionaries with sample user data.
    """
    try:
        json_data = [
            {"name": "Raja Clarke", "email": "consectetuer@yahoo.edu", "country": "Chile", "company": "Urna Nunc Consulting"},
            {"name": "Melissa Hobbs", "email": "massa.non@hotmail.couk", "country": "France", "company": "Gravida Mauris Limited"},
            {"name": "John Doe", "email": "john.doe@example.com", "country": "USA", "company": "Example Corp"},
            {"name": "Jane Smith", "email": "jane.smith@example.org", "country": "Canada", "company": "Innovative Solutions Inc"}
        ]
        logging.info("User JSON data loaded successfully.")
        return json_data
    except Exception as e:
        logging.error(f"Error loading JSON data: {e}")
        return []

# Streamlit App
st.title("Penetration Testing AI Assistant")

# Load the model and tokenizer
model, tokenizer = load_model()

# User instruction input
instruction = st.text_input("Enter an instruction for the model:")

# Generate text button
if instruction:
    try:
        generated_text = generate_text(model, tokenizer, instruction)
        st.subheader("Generated Text:")
        st.write(generated_text)
    except ValueError as ve:
        st.error(f"Invalid input: {ve}")
    except Exception as e:
        logging.error(f"Error during text generation: {e}")
        st.error("An error occurred. Please try again.")

# Display JSON user data
st.subheader("User Data (from JSON)")
user_data = load_json_data()

for user in user_data:
    st.write(f"**Name:** {user['name']}")
    st.write(f"**Email:** {user['email']}")
    st.write(f"**Country:** {user['country']}")
    st.write(f"**Company:** {user['company']}")
    st.write("---")