reconninja-wordlists-v2

Running

App Files Files Community

Canstralian commited on Nov 9, 2024

Commit

16bf80f

verified ·

1 Parent(s): eaa916b

Update app.py

Browse files

Files changed (1) hide show

app.py +130 -165

app.py CHANGED Viewed

@@ -1,176 +1,141 @@
 import streamlit as st
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-import seaborn as sns
-from wordcloud import WordCloud
-from dotenv import load_dotenv
-import os
-# Load environment variables from .env file
-load_dotenv()
-access_token = os.getenv("HUGGINGFACE_ACCESS_TOKEN")
-# Page configuration
-st.set_page_config(page_title="ReconNinja Wordlists", page_icon="💬", layout="wide")
-# Sidebar for navigation
-def display_sidebar():
-    st.sidebar.title("Navigation")
-    options = ["Wordlist Generator", "Statistics", "Security Analysis"]
-    choice = st.sidebar.radio("Go to", options)
-    return choice
-# Header section
-def display_header():
-    st.title("💬 ReconNinja Wordlists")
-    st.subheader("Tailored wordlists for efficient penetration testing")
-    st.markdown("""
-        This application generates customized wordlists for use in network reconnaissance and penetration testing.
-        Adjust the parameters to generate wordlists suited for your specific testing scenario.
-    """)
-# Sidebar for user input
-def get_user_inputs():
-    st.sidebar.header("Customize Your Wordlist")
-    st.sidebar.markdown("""
-        Adjust the following parameters to create wordlists optimized for your penetration testing tasks.
-    """)
-    wordlist_size = st.sidebar.slider("Wordlist Size", min_value=50, max_value=10000, value=1000, step=50)
-    min_length = st.sidebar.slider("Minimum Word Length", min_value=3, max_value=12, value=6)
-    max_length = st.sidebar.slider("Maximum Word Length", min_value=3, max_value=12, value=8)
-    include_special_chars = st.sidebar.checkbox("Include Special Characters", value=False)
-    include_numbers = st.sidebar.checkbox("Include Numbers", value=True)
-    return wordlist_size, min_length, max_length, include_special_chars, include_numbers
-# Wordlist generation logic (mock-up for your project)
-def generate_wordlist(size, min_length, max_length, special_chars=False, numbers=True):
-    words = []
-    for _ in range(size):
-        word = ''.join(np.random.choice(list("abcdefghijklmnopqrstuvwxyz"), size=np.random.randint(min_length, max_length)))
-        if special_chars:
-            word += np.random.choice(["!", "@", "#", "$", "%"])
-        if numbers:
-            word += np.random.choice([str(i) for i in range(10)])
-        words.append(word)
-    return words
-# Wordlist generation and display
-def generate_and_display_wordlist(wordlist_size, min_length, max_length, include_special_chars, include_numbers):
     try:
-        # Generate the wordlist
-        wordlist = generate_wordlist(
-            size=wordlist_size,
-            min_length=min_length,
-            max_length=max_length,
-            special_chars=include_special_chars,
-            numbers=include_numbers
         )
-        # Display a preview of the wordlist
-        st.write(f"Preview of {wordlist_size} words:")
-        st.dataframe(pd.DataFrame(wordlist[:20], columns=["Generated Words"]))  # Display first 20 words
-        # Provide a download link for the full wordlist
-        st.markdown("### Download Full Wordlist")
-        csv_data = pd.Series(wordlist).to_csv(index=False).encode()
-        st.download_button(
-            label="Download Wordlist as CSV",
-            data=csv_data,
-            file_name="reconninja_wordlist.csv",
-            mime="text/csv"
         )
-        return wordlist
     except Exception as e:
-        st.error(f"Error generating wordlist: {e}")
-        return None
-# Visualizing the wordlist statistics
-def display_wordlist_statistics(wordlist):
-    if wordlist:
-        st.header("Wordlist Statistics")
-        # Calculate and display word length distribution
-        word_lengths = [len(word) for word in wordlist]
-        word_length_df = pd.DataFrame(word_lengths, columns=["Word Length"])
-        # Bar Chart for Word Length Distribution
-        st.subheader("Word Length Distribution")
-        fig, ax = plt.subplots(figsize=(8, 6))
-        sns.countplot(x=word_length_df["Word Length"], ax=ax, palette="viridis")
-        ax.set_title("Frequency of Word Lengths")
-        ax.set_xlabel("Word Length")
-        ax.set_ylabel("Frequency")
-        st.pyplot(fig)
-        # Word Cloud of Words
-        st.subheader("Word Cloud")
-        wordcloud = WordCloud(width=800, height=400, background_color="white").generate(" ".join(wordlist))
-        st.image(wordcloud.to_array(), use_column_width=True)
-# Analyze wordlist security (entropy)
-def analyze_wordlist_security(wordlist):
-    if wordlist:
-        st.header("Analyze Wordlist Security")
-        entropy_slider = st.slider(
-            "Select Entropy Multiplier",
-            min_value=1.0,
-            max_value=10.0,
-            value=3.0,
-            step=0.1
-        )
-        # Simulate password entropy calculation
-        entropy = np.log2(len(wordlist) ** entropy_slider)
-        st.write(f"Estimated Entropy: {entropy:.2f} bits")
-        # Security analysis feedback
-        if entropy < 50:
-            st.warning("Low entropy detected! This wordlist might be vulnerable to brute-force attacks.")
-        else:
-            st.success("Good entropy! This wordlist is secure against most brute-force attempts.")
-# Footer section
-def display_footer():
-    st.markdown("---")
-    st.markdown(
-        "Made with ❤️ by Canstralian. For more information on ReconNinja, visit our [GitHub](https://github.com/Canstralian)."
-    )
-# Main application function
-def main():
-    choice = display_sidebar()
-    display_header()
-    if 'wordlist' not in st.session_state:
-        st.session_state.wordlist = None  # Initialize wordlist if it doesn't exist
-    if choice == "Wordlist Generator":
-        wordlist_size, min_length, max_length, include_special_chars, include_numbers = get_user_inputs()
-        wordlist = generate_and_display_wordlist(
-            wordlist_size, min_length, max_length, include_special_chars, include_numbers
-        )
-        # Store wordlist in session_state
-        st.session_state.wordlist = wordlist
-    elif choice == "Statistics":
-        if st.session_state.wordlist is None:
-            st.warning("Please generate a wordlist first!")
-        else:
-            display_wordlist_statistics(st.session_state.wordlist)
-    elif choice == "Security Analysis":
-        if st.session_state.wordlist is None:
-            st.warning("Please generate a wordlist first!")
-        else:
-            analyze_wordlist_security(st.session_state.wordlist)
-    display_footer()
-if __name__ == "__main__":
-    main()

 import streamlit as st
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+import json
+import logging
+import re
+# Set up logging
+logging.basicConfig(
+    filename="app.log",
+    level=logging.INFO,
+    format="%(asctime)s:%(levelname)s:%(message)s"
+)
+# Model and tokenizer loading function with caching
+@st.cache_resource
+def load_model():
+    """
+    Loads and caches the pre-trained language model and tokenizer.
+    Returns:
+        model: Pre-trained language model.
+        tokenizer: Tokenizer for the model.
+    """
+    model_path = "Canstralian/pentest_ai"
     try:
+        model = AutoModelForCausalLM.from_pretrained(
+            model_path,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            device_map="auto",
+            load_in_4bit=False,
+            load_in_8bit=True,
+            trust_remote_code=True,
         )
+        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+        logging.info("Model and tokenizer loaded successfully.")
+        return model, tokenizer
+    except Exception as e:
+        logging.error(f"Error loading model: {e}")
+        st.error("Failed to load model. Please check the logs.")
+        return None, None
+def sanitize_input(text):
+    """
+    Sanitizes and validates user input text to prevent injection or formatting issues.
+    Args:
+        text (str): User input text.
+    Returns:
+        str: Sanitized text.
+    """
+    if not isinstance(text, str):
+        raise ValueError("Input must be a string.")
+    # Basic sanitization to remove unwanted characters
+    sanitized_text = re.sub(r"[^a-zA-Z0-9\s\.,!?]", "", text)
+    return sanitized_text.strip()
+def generate_text(model, tokenizer, instruction):
+    """
+    Generates text based on the provided instruction using the loaded model.
+    Args:
+        model: The language model.
+        tokenizer: Tokenizer for encoding/decoding.
+        instruction (str): Instruction text for the model.
+    Returns:
+        str: Generated text response from the model.
+    """
+    try:
+        # Validate and sanitize instruction input
+        instruction = sanitize_input(instruction)
+        tokens = tokenizer.encode(instruction, return_tensors='pt').to('cuda')
+        generated_tokens = model.generate(
+            tokens,
+            max_length=1024,
+            top_p=1.0,
+            temperature=0.5,
+            top_k=50
         )
+        generated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+        logging.info("Text generated successfully.")
+        return generated_text
+    except Exception as e:
+        logging.error(f"Error generating text: {e}")
+        return "Error in text generation."
+@st.cache_data
+def load_json_data():
+    """
+    Loads JSON data, simulating the loading process with a sample list.
+    Returns:
+        list: A list of dictionaries with sample user data.
+    """
+    try:
+        json_data = [
+            {"name": "Raja Clarke", "email": "consectetuer@yahoo.edu", "country": "Chile", "company": "Urna Nunc Consulting"},
+            {"name": "Melissa Hobbs", "email": "massa.non@hotmail.couk", "country": "France", "company": "Gravida Mauris Limited"},
+            {"name": "John Doe", "email": "john.doe@example.com", "country": "USA", "company": "Example Corp"},
+            {"name": "Jane Smith", "email": "jane.smith@example.org", "country": "Canada", "company": "Innovative Solutions Inc"}
+        ]
+        logging.info("User JSON data loaded successfully.")
+        return json_data
     except Exception as e:
+        logging.error(f"Error loading JSON data: {e}")
+        return []
+# Streamlit App
+st.title("Penetration Testing AI Assistant")
+# Load the model and tokenizer
+model, tokenizer = load_model()
+# User instruction input
+instruction = st.text_input("Enter an instruction for the model:")
+# Generate text button
+if instruction:
+    try:
+        generated_text = generate_text(model, tokenizer, instruction)
+        st.subheader("Generated Text:")
+        st.write(generated_text)
+    except ValueError as ve:
+        st.error(f"Invalid input: {ve}")
+    except Exception as e:
+        logging.error(f"Error during text generation: {e}")
+        st.error("An error occurred. Please try again.")
+# Display JSON user data
+st.subheader("User Data (from JSON)")
+user_data = load_json_data()
+for user in user_data:
+    st.write(f"**Name:** {user['name']}")
+    st.write(f"**Email:** {user['email']}")
+    st.write(f"**Country:** {user['country']}")
+    st.write(f"**Company:** {user['company']}")
+    st.write("---")