Canstralian commited on
Commit
16bf80f
·
verified ·
1 Parent(s): eaa916b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -165
app.py CHANGED
@@ -1,176 +1,141 @@
1
  import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- import matplotlib.pyplot as plt
5
- import seaborn as sns
6
- from wordcloud import WordCloud
7
- from dotenv import load_dotenv
8
- import os
9
-
10
- # Load environment variables from .env file
11
- load_dotenv()
12
- access_token = os.getenv("HUGGINGFACE_ACCESS_TOKEN")
13
-
14
- # Page configuration
15
- st.set_page_config(page_title="ReconNinja Wordlists", page_icon="💬", layout="wide")
16
-
17
- # Sidebar for navigation
18
- def display_sidebar():
19
- st.sidebar.title("Navigation")
20
- options = ["Wordlist Generator", "Statistics", "Security Analysis"]
21
- choice = st.sidebar.radio("Go to", options)
22
- return choice
23
-
24
- # Header section
25
- def display_header():
26
- st.title("💬 ReconNinja Wordlists")
27
- st.subheader("Tailored wordlists for efficient penetration testing")
28
- st.markdown("""
29
- This application generates customized wordlists for use in network reconnaissance and penetration testing.
30
- Adjust the parameters to generate wordlists suited for your specific testing scenario.
31
- """)
32
-
33
- # Sidebar for user input
34
- def get_user_inputs():
35
- st.sidebar.header("Customize Your Wordlist")
36
- st.sidebar.markdown("""
37
- Adjust the following parameters to create wordlists optimized for your penetration testing tasks.
38
- """)
39
- wordlist_size = st.sidebar.slider("Wordlist Size", min_value=50, max_value=10000, value=1000, step=50)
40
- min_length = st.sidebar.slider("Minimum Word Length", min_value=3, max_value=12, value=6)
41
- max_length = st.sidebar.slider("Maximum Word Length", min_value=3, max_value=12, value=8)
42
- include_special_chars = st.sidebar.checkbox("Include Special Characters", value=False)
43
- include_numbers = st.sidebar.checkbox("Include Numbers", value=True)
44
-
45
- return wordlist_size, min_length, max_length, include_special_chars, include_numbers
46
-
47
- # Wordlist generation logic (mock-up for your project)
48
- def generate_wordlist(size, min_length, max_length, special_chars=False, numbers=True):
49
- words = []
50
- for _ in range(size):
51
- word = ''.join(np.random.choice(list("abcdefghijklmnopqrstuvwxyz"), size=np.random.randint(min_length, max_length)))
52
- if special_chars:
53
- word += np.random.choice(["!", "@", "#", "$", "%"])
54
- if numbers:
55
- word += np.random.choice([str(i) for i in range(10)])
56
- words.append(word)
57
- return words
58
-
59
- # Wordlist generation and display
60
- def generate_and_display_wordlist(wordlist_size, min_length, max_length, include_special_chars, include_numbers):
61
  try:
62
- # Generate the wordlist
63
- wordlist = generate_wordlist(
64
- size=wordlist_size,
65
- min_length=min_length,
66
- max_length=max_length,
67
- special_chars=include_special_chars,
68
- numbers=include_numbers
69
  )
70
-
71
- # Display a preview of the wordlist
72
- st.write(f"Preview of {wordlist_size} words:")
73
- st.dataframe(pd.DataFrame(wordlist[:20], columns=["Generated Words"])) # Display first 20 words
74
-
75
- # Provide a download link for the full wordlist
76
- st.markdown("### Download Full Wordlist")
77
- csv_data = pd.Series(wordlist).to_csv(index=False).encode()
78
- st.download_button(
79
- label="Download Wordlist as CSV",
80
- data=csv_data,
81
- file_name="reconninja_wordlist.csv",
82
- mime="text/csv"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  )
 
 
 
 
 
 
84
 
85
- return wordlist
 
 
 
86
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  except Exception as e:
88
- st.error(f"Error generating wordlist: {e}")
89
- return None
90
-
91
- # Visualizing the wordlist statistics
92
- def display_wordlist_statistics(wordlist):
93
- if wordlist:
94
- st.header("Wordlist Statistics")
95
-
96
- # Calculate and display word length distribution
97
- word_lengths = [len(word) for word in wordlist]
98
- word_length_df = pd.DataFrame(word_lengths, columns=["Word Length"])
99
-
100
- # Bar Chart for Word Length Distribution
101
- st.subheader("Word Length Distribution")
102
- fig, ax = plt.subplots(figsize=(8, 6))
103
- sns.countplot(x=word_length_df["Word Length"], ax=ax, palette="viridis")
104
- ax.set_title("Frequency of Word Lengths")
105
- ax.set_xlabel("Word Length")
106
- ax.set_ylabel("Frequency")
107
- st.pyplot(fig)
108
-
109
- # Word Cloud of Words
110
- st.subheader("Word Cloud")
111
- wordcloud = WordCloud(width=800, height=400, background_color="white").generate(" ".join(wordlist))
112
- st.image(wordcloud.to_array(), use_column_width=True)
113
-
114
- # Analyze wordlist security (entropy)
115
- def analyze_wordlist_security(wordlist):
116
- if wordlist:
117
- st.header("Analyze Wordlist Security")
118
-
119
- entropy_slider = st.slider(
120
- "Select Entropy Multiplier",
121
- min_value=1.0,
122
- max_value=10.0,
123
- value=3.0,
124
- step=0.1
125
- )
126
-
127
- # Simulate password entropy calculation
128
- entropy = np.log2(len(wordlist) ** entropy_slider)
129
- st.write(f"Estimated Entropy: {entropy:.2f} bits")
130
-
131
- # Security analysis feedback
132
- if entropy < 50:
133
- st.warning("Low entropy detected! This wordlist might be vulnerable to brute-force attacks.")
134
- else:
135
- st.success("Good entropy! This wordlist is secure against most brute-force attempts.")
136
-
137
- # Footer section
138
- def display_footer():
139
- st.markdown("---")
140
- st.markdown(
141
- "Made with ❤️ by Canstralian. For more information on ReconNinja, visit our [GitHub](https://github.com/Canstralian)."
142
- )
143
-
144
- # Main application function
145
- def main():
146
- choice = display_sidebar()
147
-
148
- display_header()
149
-
150
- if 'wordlist' not in st.session_state:
151
- st.session_state.wordlist = None # Initialize wordlist if it doesn't exist
152
-
153
- if choice == "Wordlist Generator":
154
- wordlist_size, min_length, max_length, include_special_chars, include_numbers = get_user_inputs()
155
- wordlist = generate_and_display_wordlist(
156
- wordlist_size, min_length, max_length, include_special_chars, include_numbers
157
- )
158
- # Store wordlist in session_state
159
- st.session_state.wordlist = wordlist
160
 
161
- elif choice == "Statistics":
162
- if st.session_state.wordlist is None:
163
- st.warning("Please generate a wordlist first!")
164
- else:
165
- display_wordlist_statistics(st.session_state.wordlist)
 
 
 
 
 
 
 
 
 
166
 
167
- elif choice == "Security Analysis":
168
- if st.session_state.wordlist is None:
169
- st.warning("Please generate a wordlist first!")
170
- else:
171
- analyze_wordlist_security(st.session_state.wordlist)
172
 
173
- display_footer()
 
 
 
 
 
174
 
175
- if __name__ == "__main__":
176
- main()
 
1
  import streamlit as st
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+ import json
5
+ import logging
6
+ import re
7
+
8
+ # Set up logging
9
+ logging.basicConfig(
10
+ filename="app.log",
11
+ level=logging.INFO,
12
+ format="%(asctime)s:%(levelname)s:%(message)s"
13
+ )
14
+
15
+ # Model and tokenizer loading function with caching
16
+ @st.cache_resource
17
+ def load_model():
18
+ """
19
+ Loads and caches the pre-trained language model and tokenizer.
20
+
21
+ Returns:
22
+ model: Pre-trained language model.
23
+ tokenizer: Tokenizer for the model.
24
+ """
25
+ model_path = "Canstralian/pentest_ai"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  try:
27
+ model = AutoModelForCausalLM.from_pretrained(
28
+ model_path,
29
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
30
+ device_map="auto",
31
+ load_in_4bit=False,
32
+ load_in_8bit=True,
33
+ trust_remote_code=True,
34
  )
35
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
36
+ logging.info("Model and tokenizer loaded successfully.")
37
+ return model, tokenizer
38
+ except Exception as e:
39
+ logging.error(f"Error loading model: {e}")
40
+ st.error("Failed to load model. Please check the logs.")
41
+ return None, None
42
+
43
+ def sanitize_input(text):
44
+ """
45
+ Sanitizes and validates user input text to prevent injection or formatting issues.
46
+
47
+ Args:
48
+ text (str): User input text.
49
+
50
+ Returns:
51
+ str: Sanitized text.
52
+ """
53
+ if not isinstance(text, str):
54
+ raise ValueError("Input must be a string.")
55
+ # Basic sanitization to remove unwanted characters
56
+ sanitized_text = re.sub(r"[^a-zA-Z0-9\s\.,!?]", "", text)
57
+ return sanitized_text.strip()
58
+
59
+ def generate_text(model, tokenizer, instruction):
60
+ """
61
+ Generates text based on the provided instruction using the loaded model.
62
+
63
+ Args:
64
+ model: The language model.
65
+ tokenizer: Tokenizer for encoding/decoding.
66
+ instruction (str): Instruction text for the model.
67
+
68
+ Returns:
69
+ str: Generated text response from the model.
70
+ """
71
+ try:
72
+ # Validate and sanitize instruction input
73
+ instruction = sanitize_input(instruction)
74
+ tokens = tokenizer.encode(instruction, return_tensors='pt').to('cuda')
75
+ generated_tokens = model.generate(
76
+ tokens,
77
+ max_length=1024,
78
+ top_p=1.0,
79
+ temperature=0.5,
80
+ top_k=50
81
  )
82
+ generated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
83
+ logging.info("Text generated successfully.")
84
+ return generated_text
85
+ except Exception as e:
86
+ logging.error(f"Error generating text: {e}")
87
+ return "Error in text generation."
88
 
89
+ @st.cache_data
90
+ def load_json_data():
91
+ """
92
+ Loads JSON data, simulating the loading process with a sample list.
93
 
94
+ Returns:
95
+ list: A list of dictionaries with sample user data.
96
+ """
97
+ try:
98
+ json_data = [
99
+ {"name": "Raja Clarke", "email": "consectetuer@yahoo.edu", "country": "Chile", "company": "Urna Nunc Consulting"},
100
+ {"name": "Melissa Hobbs", "email": "massa.non@hotmail.couk", "country": "France", "company": "Gravida Mauris Limited"},
101
+ {"name": "John Doe", "email": "john.doe@example.com", "country": "USA", "company": "Example Corp"},
102
+ {"name": "Jane Smith", "email": "jane.smith@example.org", "country": "Canada", "company": "Innovative Solutions Inc"}
103
+ ]
104
+ logging.info("User JSON data loaded successfully.")
105
+ return json_data
106
  except Exception as e:
107
+ logging.error(f"Error loading JSON data: {e}")
108
+ return []
109
+
110
+ # Streamlit App
111
+ st.title("Penetration Testing AI Assistant")
112
+
113
+ # Load the model and tokenizer
114
+ model, tokenizer = load_model()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
+ # User instruction input
117
+ instruction = st.text_input("Enter an instruction for the model:")
118
+
119
+ # Generate text button
120
+ if instruction:
121
+ try:
122
+ generated_text = generate_text(model, tokenizer, instruction)
123
+ st.subheader("Generated Text:")
124
+ st.write(generated_text)
125
+ except ValueError as ve:
126
+ st.error(f"Invalid input: {ve}")
127
+ except Exception as e:
128
+ logging.error(f"Error during text generation: {e}")
129
+ st.error("An error occurred. Please try again.")
130
 
131
+ # Display JSON user data
132
+ st.subheader("User Data (from JSON)")
133
+ user_data = load_json_data()
 
 
134
 
135
+ for user in user_data:
136
+ st.write(f"**Name:** {user['name']}")
137
+ st.write(f"**Email:** {user['email']}")
138
+ st.write(f"**Country:** {user['country']}")
139
+ st.write(f"**Company:** {user['company']}")
140
+ st.write("---")
141