File size: 3,236 Bytes
7d81008
 
 
 
 
29f762a
7d81008
 
 
 
 
29f762a
7d81008
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29f762a
7d81008
29f762a
 
7d81008
29f762a
6108c6b
 
 
29f762a
 
 
6108c6b
29f762a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import pandas as pd
import numpy as np
import streamlit as st
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

# Initialize transformers
model_name = "deepset/roberta-base-squad2"
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define suspicious words and questions
suspicious_words = [
    "robbery", "crime", "exchange", "extortion", "threat", "suspicious", "fraud", "laundering",
    "illegal", "contraband", "smuggling", "burglary", "assault", "hijacking", "kidnapping", "ransom",
    "hostage", "terrorism", "homicide", "murder", "manslaughter", "weapon", "gun", "explosive", "bomb", "knives",
    "threaten", "blackmail", "intimidate", "menace", "harassment", "stalking", "kidnap", "abduction", "guns", "bombs",
    "abuse", "trafficking", "prostitution", "pimping", "drug", "narcotic", "cocaine", "heroin", "methamphetamine",
    "amphetamine", "opiate", "meth", "gang", "gangster", "mafia", "racket", "extort", "embezzle", "corruption",
    "bribe", "scam", "forgery", "counterfeit", "fraudulent", "cybercrime", "hacker", "phishing", "identity", "theft",
    "credit card", "fraud", "identity", "fraud", "ponzi", "scheme", "pyramid", "scheme", "money", "scam", "swindle", "deception",
    "conspiracy", "scheme", "plot", "coercion", "corrupt", "criminal", "felony", "misdemeanor", "felon", "fugitive",
    "wanted", "arson", "arsonist", "arsony", "stolen", "steal", "loot", "heist", "launder", "hitman", "racketeer",
    "hijack", "smuggle", "terrorist", "kidnapper", "perpetrator", "ringleader", "prowler", "vigilante", "sabotage",
    "saboteur", "suicide", "discreet", "hide", "action", "profile", "alert", "vigilant", "clandestine", "riot", "arms", "deal"
]

questions = ["What event is going to take place?", "Where is it going to happen", "What time is it going to happen?"]

# Initialize Streamlit app
st.title("Crime Detection App")

# Load data
df = pd.read_excel('senti.xlsx')
parsed_column = df['sentences'].to_list()

# Process sentences and store results
output_data = {'Crime Detected': [], 'Location Detected': [], 'Time Detected': []}

for sentence in parsed_column:
    answers = nlp(questions, sentence)
    cw = set(answers[0]['answer'].lower().split()) & set(suspicious_words)

    if cw:
        output_data['Crime Detected'].append(answers[0]['answer'])
        output_data['Location Detected'].append(answers[1]['answer'] if answers[1]['answer'] else 'No location detected')
        output_data['Time Detected'].append(answers[2]['answer'] if answers[2]['answer'] else 'No time detected')
    else:
        output_data['Crime Detected'].append('No crime detected')
        output_data['Location Detected'].append('No location detected')
        output_data['Time Detected'].append('No time detected')

# Convert data to DataFrame
output_df = pd.DataFrame(output_data)

# Display results
st.write(output_df)

# Download button for Excel file
st.download_button(label="Download Excel", data=output_df.to_excel(), file_name='crime_data_output.xlsx', mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')