full-search-api / app.py
oscarwang2's picture
Update app.py
0993713 verified
raw
history blame
3.41 kB
from flask import Flask, request, jsonify
import requests
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
app = Flask(__name__)
# Define the SearXNG instance URL
SEARXNG_INSTANCE_URL = "https://oscarwang2-searxng.hf.space/search"
# Load the educational content classifier
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/fineweb-edu-classifier")
model = AutoModelForSequenceClassification.from_pretrained("HuggingFaceTB/fineweb-edu-classifier")
def classify_educational_quality(text):
"""
Classify the educational quality of a given text snippet
Args:
text (str): Text snippet to classify
Returns:
float: Educational quality score
"""
try:
# Prepare input for the model
inputs = tokenizer(text, return_tensors="pt", padding="longest", truncation=True)
# Get model outputs
with torch.no_grad():
outputs = model(**inputs)
# Extract the logits and convert to a score
logits = outputs.logits.squeeze(-1).float().detach().numpy()
score = logits.item()
return score
except Exception as e:
print(f"Error in classification: {e}")
return 0 # Default score if classification fails
@app.route('/search', methods=['GET'])
def search():
# Get the search term from query parameters
search_term = request.args.get('q', '')
if not search_term:
return jsonify({'error': 'No search term provided'}), 400
# Define the query parameters for the SearXNG API
params = {
'q': search_term,
'format': 'json',
'categories': 'general'
}
try:
# Make the request to the SearXNG API
response = requests.get(SEARXNG_INSTANCE_URL, params=params)
# Check the response status code
if response.status_code == 200:
data = response.json()
# Retrieve the first 30 results
results = data.get('results', [])[:30]
# Classify and score educational quality for each result
scored_snippets = []
for result in results:
snippet = {
'title': result.get('title', 'No title'),
'snippet': result.get('content', 'No snippet available'),
'url': result.get('url', 'No URL')
}
# Combine title and snippet for classification
full_text = f"{snippet['title']} {snippet['snippet']}"
# Classify educational quality
edu_score = classify_educational_quality(full_text)
snippet['educational_score'] = edu_score
scored_snippets.append(snippet)
# Sort results by educational score in descending order
sorted_snippets = sorted(scored_snippets, key=lambda x: x['educational_score'], reverse=True)
return jsonify(sorted_snippets)
else:
return jsonify({'error': f'SearXNG API error: {response.status_code}'}), response.status_code
except Exception as e:
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
# Run the Flask app on port 7860
app.run(host='0.0.0.0', port=7860, debug=True)