Spaces:
Sleeping
Sleeping
from flask import Flask, request, jsonify | |
import requests | |
import torch | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
app = Flask(__name__) | |
# Define the SearXNG instance URL | |
SEARXNG_INSTANCE_URL = "https://oscarwang2-searxng.hf.space/search" | |
# Load the educational content classifier | |
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/fineweb-edu-classifier") | |
model = AutoModelForSequenceClassification.from_pretrained("HuggingFaceTB/fineweb-edu-classifier") | |
def classify_educational_quality(text): | |
""" | |
Classify the educational quality of a given text snippet | |
Args: | |
text (str): Text snippet to classify | |
Returns: | |
float: Educational quality score | |
""" | |
try: | |
# Prepare input for the model | |
inputs = tokenizer(text, return_tensors="pt", padding="longest", truncation=True) | |
# Get model outputs | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
# Extract the logits and convert to a score | |
logits = outputs.logits.squeeze(-1).float().detach().numpy() | |
score = logits.item() | |
return score | |
except Exception as e: | |
print(f"Error in classification: {e}") | |
return 0 # Default score if classification fails | |
def search(): | |
# Get the search term from query parameters | |
search_term = request.args.get('q', '') | |
if not search_term: | |
return jsonify({'error': 'No search term provided'}), 400 | |
# Define the query parameters for the SearXNG API | |
params = { | |
'q': search_term, | |
'format': 'json', | |
'categories': 'general' | |
} | |
try: | |
# Make the request to the SearXNG API | |
response = requests.get(SEARXNG_INSTANCE_URL, params=params) | |
# Check the response status code | |
if response.status_code == 200: | |
data = response.json() | |
# Retrieve the first 30 results | |
results = data.get('results', [])[:30] | |
# Classify and score educational quality for each result | |
scored_snippets = [] | |
for result in results: | |
snippet = { | |
'title': result.get('title', 'No title'), | |
'snippet': result.get('content', 'No snippet available'), | |
'url': result.get('url', 'No URL') | |
} | |
# Combine title and snippet for classification | |
full_text = f"{snippet['title']} {snippet['snippet']}" | |
# Classify educational quality | |
edu_score = classify_educational_quality(full_text) | |
snippet['educational_score'] = edu_score | |
scored_snippets.append(snippet) | |
# Sort results by educational score in descending order | |
sorted_snippets = sorted(scored_snippets, key=lambda x: x['educational_score'], reverse=True) | |
return jsonify(sorted_snippets) | |
else: | |
return jsonify({'error': f'SearXNG API error: {response.status_code}'}), response.status_code | |
except Exception as e: | |
return jsonify({'error': str(e)}), 500 | |
if __name__ == '__main__': | |
# Run the Flask app on port 7860 | |
app.run(host='0.0.0.0', port=7860, debug=True) |