oscarwang2 commited on
Commit
0993713
1 Parent(s): 238a7dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -12
app.py CHANGED
@@ -1,11 +1,44 @@
1
  from flask import Flask, request, jsonify
2
  import requests
 
 
3
 
4
  app = Flask(__name__)
5
 
6
  # Define the SearXNG instance URL
7
  SEARXNG_INSTANCE_URL = "https://oscarwang2-searxng.hf.space/search"
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  @app.route('/search', methods=['GET'])
10
  def search():
11
  # Get the search term from query parameters
@@ -13,39 +46,49 @@ def search():
13
 
14
  if not search_term:
15
  return jsonify({'error': 'No search term provided'}), 400
16
-
17
  # Define the query parameters for the SearXNG API
18
  params = {
19
  'q': search_term,
20
  'format': 'json',
21
  'categories': 'general'
22
  }
23
-
24
  try:
25
  # Make the request to the SearXNG API
26
  response = requests.get(SEARXNG_INSTANCE_URL, params=params)
27
-
28
  # Check the response status code
29
  if response.status_code == 200:
30
  data = response.json()
31
- # Retrieve the first 3 snippets
32
  results = data.get('results', [])[:30]
33
- snippets = []
34
-
35
- # Collect the snippets
36
  for result in results:
37
  snippet = {
38
  'title': result.get('title', 'No title'),
39
  'snippet': result.get('content', 'No snippet available'),
40
  'url': result.get('url', 'No URL')
41
  }
42
- snippets.append(snippet)
43
-
44
- # Return the snippets as a JSON response
45
- return jsonify(snippets)
 
 
 
 
 
 
 
 
 
 
46
  else:
47
  return jsonify({'error': f'SearXNG API error: {response.status_code}'}), response.status_code
48
-
49
  except Exception as e:
50
  return jsonify({'error': str(e)}), 500
51
 
 
1
  from flask import Flask, request, jsonify
2
  import requests
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
 
6
  app = Flask(__name__)
7
 
8
  # Define the SearXNG instance URL
9
  SEARXNG_INSTANCE_URL = "https://oscarwang2-searxng.hf.space/search"
10
 
11
+ # Load the educational content classifier
12
+ tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/fineweb-edu-classifier")
13
+ model = AutoModelForSequenceClassification.from_pretrained("HuggingFaceTB/fineweb-edu-classifier")
14
+
15
+ def classify_educational_quality(text):
16
+ """
17
+ Classify the educational quality of a given text snippet
18
+
19
+ Args:
20
+ text (str): Text snippet to classify
21
+
22
+ Returns:
23
+ float: Educational quality score
24
+ """
25
+ try:
26
+ # Prepare input for the model
27
+ inputs = tokenizer(text, return_tensors="pt", padding="longest", truncation=True)
28
+
29
+ # Get model outputs
30
+ with torch.no_grad():
31
+ outputs = model(**inputs)
32
+
33
+ # Extract the logits and convert to a score
34
+ logits = outputs.logits.squeeze(-1).float().detach().numpy()
35
+ score = logits.item()
36
+
37
+ return score
38
+ except Exception as e:
39
+ print(f"Error in classification: {e}")
40
+ return 0 # Default score if classification fails
41
+
42
  @app.route('/search', methods=['GET'])
43
  def search():
44
  # Get the search term from query parameters
 
46
 
47
  if not search_term:
48
  return jsonify({'error': 'No search term provided'}), 400
49
+
50
  # Define the query parameters for the SearXNG API
51
  params = {
52
  'q': search_term,
53
  'format': 'json',
54
  'categories': 'general'
55
  }
56
+
57
  try:
58
  # Make the request to the SearXNG API
59
  response = requests.get(SEARXNG_INSTANCE_URL, params=params)
60
+
61
  # Check the response status code
62
  if response.status_code == 200:
63
  data = response.json()
64
+ # Retrieve the first 30 results
65
  results = data.get('results', [])[:30]
66
+
67
+ # Classify and score educational quality for each result
68
+ scored_snippets = []
69
  for result in results:
70
  snippet = {
71
  'title': result.get('title', 'No title'),
72
  'snippet': result.get('content', 'No snippet available'),
73
  'url': result.get('url', 'No URL')
74
  }
75
+
76
+ # Combine title and snippet for classification
77
+ full_text = f"{snippet['title']} {snippet['snippet']}"
78
+
79
+ # Classify educational quality
80
+ edu_score = classify_educational_quality(full_text)
81
+
82
+ snippet['educational_score'] = edu_score
83
+ scored_snippets.append(snippet)
84
+
85
+ # Sort results by educational score in descending order
86
+ sorted_snippets = sorted(scored_snippets, key=lambda x: x['educational_score'], reverse=True)
87
+
88
+ return jsonify(sorted_snippets)
89
  else:
90
  return jsonify({'error': f'SearXNG API error: {response.status_code}'}), response.status_code
91
+
92
  except Exception as e:
93
  return jsonify({'error': str(e)}), 500
94