Spaces:

OpenSecurity
/

Mitre_Sigma_Netflow

Running

File size: 5,927 Bytes

import streamlit as st
import requests
import csv
from io import StringIO



import os
import random
import string

# Function to generate random file content
def generate_random_content(size=100):
    return ''.join(random.choices(string.ascii_letters + string.digits + string.punctuation, k=size))

# Create a new folder
folder_name = "random_files_folder"
os.makedirs(folder_name, exist_ok=True)
print(f"Folder '{folder_name}' created.")

# Generate random files
num_files = 5  # Number of random files to create
for i in range(num_files):
    # Random file name
    file_name = ''.join(random.choices(string.ascii_lowercase, k=8)) + ".txt"
    file_path = os.path.join(folder_name, file_name)
    
    # Random content for each file
    content = generate_random_content(size=random.randint(50, 200))  # Random content length between 50 and 200 characters
    
    # Write the content to the file
    with open(file_path, 'w') as file:
        file.write(content)
    
    print(f"Created file: {file_path}")

print("All random files generated successfully.")



# Required NetFlow schema
required_columns = [
    'Flow duration', 'Source port', 'Destination port',
    'Total forward packets', 'Total backward packets',
    'Avg forward segment size', 'Avg backward segment size'
]

# Hugging Face API Settings
HUGGING_FACE_API_URL = "https://api-inference.huggingface.co/models/sentence-transformers/all-distilroberta-v1"

# Streamlit UI
st.title("NetFlow Log Comparison Tool")
st.write("Compare your NetFlow logs against Sigma rules or MITRE ATT&CK patterns using Retrieval-Augmented Generation (RAG).")

# Display the embedding model being used
st.write("### Embedding Model in Use")
st.write("The model used for embedding is: **All-DistilRoBERTa-V1**")

# Instructions for data upload
st.markdown("""
**Instructions:**
- Upload a CSV file with your NetFlow log data.
- Ensure that the file contains **all the required columns** listed below.
- You can upload **up to 5 rows** for analysis.
""")

# Display required schema for users with bullet points
st.write("### Required NetFlow Schema:")
st.markdown("""
- **Flow duration**
- **Source port**
- **Destination port**
- **Total forward packets**
- **Total backward packets**
- **Avg forward segment size**
- **Avg backward segment size**
""")

# Step 1: File Upload
uploaded_file = st.file_uploader("Upload your NetFlow log sequence CSV file", type="csv")

# Step 2: User Token Input
hugging_face_api_token = st.text_input("Enter your Hugging Face API Token", type="password")
if not hugging_face_api_token:
    st.warning("Please provide a Hugging Face API Token to proceed.")

# Step 3: Model and Comparison Options
st.write("### Model and Comparison Options")
llm_choice = st.selectbox("Select LLM", ["All-DistilRoBERTa-V1"])  # Add other models as necessary
comparison_choice = st.selectbox("Select Comparison Type", ["Mitre", "Sigma"])

# Step 4: Run Comparison if File Uploaded and Token Provided
if uploaded_file and hugging_face_api_token:
    # Read and display the file using CSV module
    csv_file = StringIO(uploaded_file.getvalue().decode("utf-8"))
    reader = csv.DictReader(csv_file)
    csv_data = list(reader)

    # Display a few rows to the user
    st.write("Uploaded File:")
    for i, row in enumerate(csv_data[:5]):
        st.write(row)

    # Check if the file has the required schema
    if all(col in reader.fieldnames for col in required_columns):
        if len(csv_data) <= 5:
            st.success("File contains all required columns and meets the row limit of 5.")
            
            # Prepare data for Hugging Face API call
            input_texts = [f"{row}" for row in csv_data]  # Convert each row to a string for comparison
            
            # Call Hugging Face API
            headers = {"Authorization": f"Bearer {hugging_face_api_token}"}

            try:
                # Perform inference using Hugging Face API
                response = requests.post(HUGGING_FACE_API_URL, headers=headers, json={"inputs": input_texts})
                response.raise_for_status()

                # Display the results
                st.write("### Comparison Results")
                comparison_results = response.json()

                # Sort and extract top 3 results for display
                top_results = sorted(comparison_results, key=lambda x: x['score'], reverse=True)[:3]
                
                # Display the top 3 results
                for idx, result in enumerate(top_results):
                    st.write(f"**{idx + 1}.** Matched Sequence: `{result['sequence']}`")
                    st.write(f"   - **Cosine Similarity Score**: {result['score']:.4f}")

            except requests.exceptions.RequestException as e:
                st.error(f"Error calling Hugging Face API: {str(e)}")

        else:
            st.error(f"File exceeds the row limit of 5. Your file contains {len(csv_data)} rows.")
    else:
        missing_columns = [col for col in required_columns if col not in reader.fieldnames]
        st.error(f"Missing columns: {', '.join(missing_columns)}")

# Step 5: Survey Link
st.write("### Feedback Survey")
st.write("We value your feedback. [Fill out our survey](https://docs.google.com/forms/d/1-P_7Uv5OphSWhTyoPuO0jjUQnYg_Hv5oVGBkhbg-H8g/prefill)")  # Replace with your survey link

# Footer
st.markdown("---")
st.write("This free site is maintained by DeepTempo.")
# st.image("Final_DeepTempo_logo.png", width=300) # Adjust the path and width as needed 'Final DeepTempo logo.png'
st.write("[Visit DeepTempo.ai](https://deeptempo.ai)")
st.write("[Check out the underlying code on GitHub](https://github.com/deepsecoss)")

# CSS to change link color to white
st.markdown(
    """
    <style>
    a {
        color: white !important;
        text-decoration: underline; /* Optional: to keep the link recognizable */
    }
    </style>
    """,
    unsafe_allow_html=True
)