J.A.R.V.I.S / your_script.py
varun324242's picture
Upload folder using huggingface_hub
fe2a0f2 verified
import logging
import subprocess
import sys
import os
import requests
from PIL import Image
import pytesseract
from io import BytesIO
import pandas as pd
import json
from groq import Groq
from twilio.rest import Client
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from google.colab import drive, auth
# Configure logging
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('sms_debug.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
def install_dependencies():
"""Install all required packages"""
subprocess.run(['apt-get', 'update'], check=True)
subprocess.run(['apt-get', 'install', '-y', 'chromium-chromedriver'], check=True)
subprocess.run(['apt-get', 'install', '-y', 'tesseract-ocr'], check=True)
packages = [
'selenium',
'Pillow',
'pytesseract',
'pandas',
'requests',
'groq',
'twilio'
]
for package in packages:
subprocess.run([sys.executable, '-m', 'pip', 'install', package], check=True)
class SMSSender:
def __init__(self):
"""Initialize Twilio client with credentials"""
# Updated Twilio credentials
self.account_sid = "AC68e68b700bfe8ede9080e426042e6ccf"
self.auth_token = "c8a89a8f95c29b8ea8ea2c4668d4635f" # New auth token
self.from_number = "+17322534518"
try:
self.client = Client(self.account_sid, self.auth_token)
# Test authentication without making an API call
self.client.http_client.last_response = None
logger.info("Twilio client initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize Twilio client: {str(e)}")
# Instead of raising, we'll set client to None
self.client = None
def send_sms(self, to_number, message):
"""Send SMS using Twilio"""
if not self.client:
logger.error("Twilio client not initialized. SMS will not be sent.")
return False
try:
logger.info(f"Attempting to send SMS to: {to_number}")
if not to_number.startswith('+'):
to_number = f"+91{to_number}"
message = self.client.messages.create(
body=message,
from_=self.from_number,
to=to_number
)
logger.info(f"SMS sent successfully! Message SID: {message.sid}")
return True
except Exception as e:
logger.error(f"Failed to send SMS: {str(e)}", exc_info=True)
return False
class ScamDetector:
def __init__(self, groq_api_key, sms_sender):
self.groq_client = Groq(api_key=groq_api_key)
self.sms_sender = sms_sender
self.setup_drive()
def setup_drive(self):
auth.authenticate_user()
drive.mount('/content/drive')
def process_text_with_groq(self, text):
try:
prompt = f"""
Format the following extracted text from an SMS image.
Keep the original content intact but improve the formatting and remove any OCR artifacts:
{text}
"""
# Make API call to Groq
completion = self.groq_client.chat.completions.create(
model="llama3-8b-8192",
messages=[{"role": "user", "content": prompt}],
temperature=0.8,
max_tokens=1024,
top_p=1,
stream=False,
stop=None
)
# Check if we got a valid response
if completion and hasattr(completion, 'choices') and completion.choices:
return completion.choices[0].message.content.strip()
else:
error_msg = "Invalid response from Groq API"
logger.error(error_msg)
self.sms_sender.send_sms(
to_number="8140030507",
message=f"CRITICAL: Groq API Error - {error_msg}. Stopping process."
)
sys.exit(1)
except Exception as e:
error_msg = f"Critical error in Groq processing: {str(e)}"
logger.error(error_msg)
self.sms_sender.send_sms(
to_number="8140030507",
message=f"CRITICAL: Groq API Error - {error_msg}. Stopping process."
)
sys.exit(1)
def download_and_extract_text(self, url):
try:
response = requests.get(url, timeout=10)
img = Image.open(BytesIO(response.content))
text = pytesseract.image_to_string(img)
text = text.strip()
if text:
return self.process_text_with_groq(text)
except Exception as e:
logger.error(f"Error processing image from {url}: {str(e)}")
return None
def scrape_images(self):
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome(options=chrome_options)
try:
search_query = "indian scam sms" # Updated search query
encoded_query = search_query.replace(' ', '+')
driver.get(f"https://www.bing.com/images/search?q={encoded_query}")
logger.info("Loading images...")
time.sleep(3)
for i in range(5):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
logger.info(f"Scroll {i+1}/5 completed")
image_urls = []
# Get both standard and high-res images
selectors = [".mimg", ".iusc"]
for selector in selectors:
elements = driver.find_elements(By.CSS_SELECTOR, selector)
for element in elements:
try:
if selector == ".mimg":
url = element.get_attribute('src')
else:
m = element.get_attribute('m')
if m:
m_json = json.loads(m)
url = m_json.get('murl')
else:
continue
if url and url.startswith('http') and url not in image_urls:
image_urls.append(url)
except Exception as e:
logger.error(f"Error getting URL from {selector}: {str(e)}")
return image_urls
finally:
driver.quit()
def update_status_report(self, timestamp, total_images, processed_images, scam_count, ham_count, folder_path, base_path='/content/drive/MyDrive'):
"""Update the running status report"""
report_path = f"{base_path}/scam_detector_status_report.txt"
try:
# Read existing report if it exists
existing_runs = []
if os.path.exists(report_path):
with open(report_path, 'r', encoding='utf-8') as f:
existing_runs = f.readlines()
# Create new run entry
new_run = (
f"\n=== Scan Run: {timestamp} ===\n"
f"Total Images Found: {total_images}\n"
f"Successfully Processed: {processed_images}\n"
f"Scams Detected: {scam_count}\n"
f"Legitimate Messages: {ham_count}\n"
f"Results Location: {folder_path}\n"
f"{'=' * 50}\n"
)
# Append new run to existing runs
with open(report_path, 'a', encoding='utf-8') as f:
f.write(new_run)
# Calculate and append totals
total_runs = len([line for line in existing_runs if "=== Scan Run:" in line]) + 1
total_processed = sum(int(line.split(': ')[1]) for line in existing_runs if "Successfully Processed:" in line) + processed_images
total_scams = sum(int(line.split(': ')[1]) for line in existing_runs if "Scams Detected:" in line) + scam_count
total_ham = sum(int(line.split(': ')[1]) for line in existing_runs if "Legitimate Messages:" in line) + ham_count
summary = (
f"\n=== OVERALL STATISTICS ===\n"
f"Total Runs: {total_runs}\n"
f"Total Images Processed: {total_processed}\n"
f"Total Scams Detected: {total_scams}\n"
f"Total Legitimate Messages: {total_ham}\n"
f"Last Updated: {timestamp}\n"
f"{'=' * 50}\n"
)
# Update the summary at the end of file
with open(report_path, 'a', encoding='utf-8') as f:
f.write(summary)
logger.info(f"Status report updated at: {report_path}")
return total_runs, total_processed, total_scams, total_ham
except Exception as e:
logger.error(f"Error updating status report: {str(e)}")
return None
def process_and_save(self, image_urls, base_path='/content/drive/MyDrive'):
"""Process images and save results"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
logger.info("Starting to process images one by one...")
image_texts = []
scam_file_path = f"{base_path}/scam123.csv"
# Create or append to scam123.csv
if not os.path.exists(scam_file_path):
df = pd.DataFrame(columns=['v1', 'v2'])
df.to_csv(scam_file_path, index=False)
try:
existing_df = pd.read_csv(scam_file_path)
except Exception as e:
logger.error(f"Error reading existing CSV: {str(e)}")
existing_df = pd.DataFrame(columns=['v1', 'v2'])
total_images = len(image_urls)
for i, url in enumerate(image_urls, 1):
try:
logger.info(f"\n--- Processing image {i}/{total_images} ---")
logger.info(f"URL: {url}")
# Step 1: Download and extract text
logger.info("Extracting text from image...")
text = self.download_and_extract_text(url)
if not text:
logger.warning(f"No text extracted from image {i}, skipping...")
continue
# Step 2: Send to prediction API
logger.info("Sending text to prediction API...")
try:
response = requests.post(
"https://varun324242-sssssss.hf.space/predict",
json={"message": text}
)
response.raise_for_status()
prediction_result = response.json()
prediction = prediction_result.get("predicted_result", "unknown")
# Store the result
image_texts.append({
'URL': url,
'Text': text,
'Prediction': prediction
})
# If prediction is ham, append to scam123.csv
if prediction == "ham":
new_row = pd.DataFrame([{
'v1': 'scam',
'v2': text
}])
existing_df = pd.concat([existing_df, new_row], ignore_index=True)
existing_df.to_csv(scam_file_path, index=False)
logger.info(f"Added ham message to scam123.csv")
logger.info(f"Successfully processed image {i}/{total_images}")
logger.info(f"Prediction: {prediction}")
except requests.exceptions.RequestException as e:
logger.error(f"API error for image {i}: {str(e)}")
continue
except Exception as e:
logger.error(f"Error processing image {i}: {str(e)}")
continue
# Save final results
folder_path = f"{base_path}/scam_detector_{timestamp}"
os.makedirs(folder_path, exist_ok=True)
url_path = f'{folder_path}/scam_image_urls.txt'
text_path = f'{folder_path}/scam_image_texts.txt'
csv_path = f'{folder_path}/scam_messages.csv'
# Save all results to files
with open(url_path, 'w') as f:
for url in image_urls:
f.write(url + '\n')
with open(text_path, 'w', encoding='utf-8') as f:
for item in image_texts:
f.write(f"URL: {item['URL']}\n")
f.write(f"Text: {item['Text']}\n")
f.write(f"Prediction: {item['Prediction']}\n")
f.write("-" * 80 + "\n")
df = pd.DataFrame(image_texts)
df.to_csv(csv_path, index=False)
# Calculate statistics
ham_count = sum(1 for item in image_texts if item['Prediction'] == 'ham')
scam_count = sum(1 for item in image_texts if item['Prediction'] == 'scam')
# Update status report
total_runs, total_processed, total_scams, total_ham = self.update_status_report(
timestamp=timestamp,
total_images=len(image_urls),
processed_images=len(image_texts),
scam_count=scam_count,
ham_count=ham_count,
folder_path=folder_path,
base_path=base_path
)
# Modified final message to include overall statistics
final_message = (
f"Scan Complete!\n"
f"This Run:\n"
f"- Images Found: {len(image_urls)}\n"
f"- Processed: {len(image_texts)}\n"
f"- Scams: {scam_count}\n"
f"- Legitimate: {ham_count}\n"
f"\nOverall Statistics:\n"
f"- Total Runs: {total_runs}\n"
f"- Total Processed: {total_processed}\n"
f"- Total Scams: {total_scams}\n"
f"- Total Ham: {total_ham}\n"
f"\nResults saved to: {folder_path}"
)
self.sms_sender.send_sms(
to_number="8140030507",
message=final_message
)
return url_path, text_path, csv_path
def main():
try:
logger.info("Starting the continuous scam detection process...")
install_dependencies()
GROQ_API_KEY = "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"
try:
sms_sender = SMSSender()
except Exception as e:
logger.error(f"Failed to initialize SMS sender: {str(e)}")
sms_sender = None
detector = ScamDetector(groq_api_key=GROQ_API_KEY, sms_sender=sms_sender)
while True: # Continuous loop
try:
run_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
run_log_handler = logging.FileHandler(f'scam_run_{run_timestamp}.log')
run_log_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logger.addHandler(run_log_handler)
# Send start notification only if SMS sender is available
start_message = f"Starting new scam detection scan at {run_timestamp}"
logger.info(start_message)
if sms_sender:
sms_sender.send_sms(
to_number="8140030507",
message=start_message
)
logger.info("Starting image scraping...")
image_urls = detector.scrape_images()
logger.info(f"\nFound {len(image_urls)} unique images")
url_path, text_path, csv_path = detector.process_and_save(image_urls)
logger.info(f"\nResults saved!")
# Remove the run-specific log handler
logger.removeHandler(run_log_handler)
run_log_handler.close()
# Wait for 30 seconds before next run
logger.info("Waiting 30 seconds before next scan...")
time.sleep(30)
except Exception as e:
error_msg = f"Error in detection run: {str(e)}"
logger.error(error_msg, exc_info=True)
time.sleep(300) # Wait 5 minutes before retrying
continue
except Exception as e:
critical_error = f"Critical error occurred: {str(e)}"
logger.error(critical_error, exc_info=True)
# Send critical error notification only if SMS sender is available
if sms_sender:
sms_sender.send_sms(
to_number="8140030507",
message=f"CRITICAL ERROR: {str(e)[:100]}... System will restart in 1 minute."
)
time.sleep(60) # Wait 1 minute before restart
main() # Restart the main function
if __name__ == "__main__":
main()