from flask import Flask, request, jsonify import requests import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification app = Flask(__name__) # Define the SearXNG instance URL SEARXNG_INSTANCE_URL = "https://oscarwang2-searxng.hf.space/search" # Load the educational content classifier tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/fineweb-edu-classifier") model = AutoModelForSequenceClassification.from_pretrained("HuggingFaceTB/fineweb-edu-classifier") def classify_educational_quality(text): """ Classify the educational quality of a given text snippet Args: text (str): Text snippet to classify Returns: float: Educational quality score """ try: # Prepare input for the model inputs = tokenizer(text, return_tensors="pt", padding="longest", truncation=True) # Get model outputs with torch.no_grad(): outputs = model(**inputs) # Extract the logits and convert to a score logits = outputs.logits.squeeze(-1).float().detach().numpy() score = logits.item() return score except Exception as e: print(f"Error in classification: {e}") return 0 # Default score if classification fails @app.route('/search', methods=['GET']) def search(): # Get the search term from query parameters search_term = request.args.get('q', '') if not search_term: return jsonify({'error': 'No search term provided'}), 400 # Define the query parameters for the SearXNG API params = { 'q': search_term, 'format': 'json', 'categories': 'general' } try: # Make the request to the SearXNG API response = requests.get(SEARXNG_INSTANCE_URL, params=params) # Check the response status code if response.status_code == 200: data = response.json() # Retrieve the first 30 results results = data.get('results', [])[:30] # Classify and score educational quality for each result scored_snippets = [] for result in results: snippet = { 'title': result.get('title', 'No title'), 'snippet': result.get('content', 'No snippet available'), 'url': result.get('url', 'No URL') } # Combine title and snippet for classification full_text = f"{snippet['title']} {snippet['snippet']}" # Classify educational quality edu_score = classify_educational_quality(full_text) snippet['educational_score'] = edu_score scored_snippets.append(snippet) # Sort results by educational score in descending order sorted_snippets = sorted(scored_snippets, key=lambda x: x['educational_score'], reverse=True) return jsonify(sorted_snippets) else: return jsonify({'error': f'SearXNG API error: {response.status_code}'}), response.status_code except Exception as e: return jsonify({'error': str(e)}), 500 if __name__ == '__main__': # Run the Flask app on port 7860 app.run(host='0.0.0.0', port=7860, debug=True)