File size: 5,927 Bytes
460a977 1f18c63 460a977 7e4120a 460a977 7e4120a 460a977 7e4120a 460a977 7e4120a 460a977 7e4120a 460a977 7e4120a 460a977 7e4120a 460a977 7e4120a 460a977 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import streamlit as st
import requests
import csv
from io import StringIO
import os
import random
import string
# Function to generate random file content
def generate_random_content(size=100):
return ''.join(random.choices(string.ascii_letters + string.digits + string.punctuation, k=size))
# Create a new folder
folder_name = "random_files_folder"
os.makedirs(folder_name, exist_ok=True)
print(f"Folder '{folder_name}' created.")
# Generate random files
num_files = 5 # Number of random files to create
for i in range(num_files):
# Random file name
file_name = ''.join(random.choices(string.ascii_lowercase, k=8)) + ".txt"
file_path = os.path.join(folder_name, file_name)
# Random content for each file
content = generate_random_content(size=random.randint(50, 200)) # Random content length between 50 and 200 characters
# Write the content to the file
with open(file_path, 'w') as file:
file.write(content)
print(f"Created file: {file_path}")
print("All random files generated successfully.")
# Required NetFlow schema
required_columns = [
'Flow duration', 'Source port', 'Destination port',
'Total forward packets', 'Total backward packets',
'Avg forward segment size', 'Avg backward segment size'
]
# Hugging Face API Settings
HUGGING_FACE_API_URL = "https://api-inference.huggingface.co/models/sentence-transformers/all-distilroberta-v1"
# Streamlit UI
st.title("NetFlow Log Comparison Tool")
st.write("Compare your NetFlow logs against Sigma rules or MITRE ATT&CK patterns using Retrieval-Augmented Generation (RAG).")
# Display the embedding model being used
st.write("### Embedding Model in Use")
st.write("The model used for embedding is: **All-DistilRoBERTa-V1**")
# Instructions for data upload
st.markdown("""
**Instructions:**
- Upload a CSV file with your NetFlow log data.
- Ensure that the file contains **all the required columns** listed below.
- You can upload **up to 5 rows** for analysis.
""")
# Display required schema for users with bullet points
st.write("### Required NetFlow Schema:")
st.markdown("""
- **Flow duration**
- **Source port**
- **Destination port**
- **Total forward packets**
- **Total backward packets**
- **Avg forward segment size**
- **Avg backward segment size**
""")
# Step 1: File Upload
uploaded_file = st.file_uploader("Upload your NetFlow log sequence CSV file", type="csv")
# Step 2: User Token Input
hugging_face_api_token = st.text_input("Enter your Hugging Face API Token", type="password")
if not hugging_face_api_token:
st.warning("Please provide a Hugging Face API Token to proceed.")
# Step 3: Model and Comparison Options
st.write("### Model and Comparison Options")
llm_choice = st.selectbox("Select LLM", ["All-DistilRoBERTa-V1"]) # Add other models as necessary
comparison_choice = st.selectbox("Select Comparison Type", ["Mitre", "Sigma"])
# Step 4: Run Comparison if File Uploaded and Token Provided
if uploaded_file and hugging_face_api_token:
# Read and display the file using CSV module
csv_file = StringIO(uploaded_file.getvalue().decode("utf-8"))
reader = csv.DictReader(csv_file)
csv_data = list(reader)
# Display a few rows to the user
st.write("Uploaded File:")
for i, row in enumerate(csv_data[:5]):
st.write(row)
# Check if the file has the required schema
if all(col in reader.fieldnames for col in required_columns):
if len(csv_data) <= 5:
st.success("File contains all required columns and meets the row limit of 5.")
# Prepare data for Hugging Face API call
input_texts = [f"{row}" for row in csv_data] # Convert each row to a string for comparison
# Call Hugging Face API
headers = {"Authorization": f"Bearer {hugging_face_api_token}"}
try:
# Perform inference using Hugging Face API
response = requests.post(HUGGING_FACE_API_URL, headers=headers, json={"inputs": input_texts})
response.raise_for_status()
# Display the results
st.write("### Comparison Results")
comparison_results = response.json()
# Sort and extract top 3 results for display
top_results = sorted(comparison_results, key=lambda x: x['score'], reverse=True)[:3]
# Display the top 3 results
for idx, result in enumerate(top_results):
st.write(f"**{idx + 1}.** Matched Sequence: `{result['sequence']}`")
st.write(f" - **Cosine Similarity Score**: {result['score']:.4f}")
except requests.exceptions.RequestException as e:
st.error(f"Error calling Hugging Face API: {str(e)}")
else:
st.error(f"File exceeds the row limit of 5. Your file contains {len(csv_data)} rows.")
else:
missing_columns = [col for col in required_columns if col not in reader.fieldnames]
st.error(f"Missing columns: {', '.join(missing_columns)}")
# Step 5: Survey Link
st.write("### Feedback Survey")
st.write("We value your feedback. [Fill out our survey](https://docs.google.com/forms/d/1-P_7Uv5OphSWhTyoPuO0jjUQnYg_Hv5oVGBkhbg-H8g/prefill)") # Replace with your survey link
# Footer
st.markdown("---")
st.write("This free site is maintained by DeepTempo.")
# st.image("Final_DeepTempo_logo.png", width=300) # Adjust the path and width as needed 'Final DeepTempo logo.png'
st.write("[Visit DeepTempo.ai](https://deeptempo.ai)")
st.write("[Check out the underlying code on GitHub](https://github.com/deepsecoss)")
# CSS to change link color to white
st.markdown(
"""
<style>
a {
color: white !important;
text-decoration: underline; /* Optional: to keep the link recognizable */
}
</style>
""",
unsafe_allow_html=True
)
|