cakiki commited on
Commit
8b3e201
β€’
1 Parent(s): 68b0ce4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -19,15 +19,15 @@ def mark_tokens_bold(string, tokens):
19
  return string
20
 
21
 
22
- def process_results(results):
23
  if len(results) == 0:
24
  return """<br><p>No results retrieved.</p><br><hr>"""
25
 
26
  results_html = ""
27
  for result in results:
28
  text_html = result["text"]
29
- # text_html = mark_tokens_bold(text_html, highlight_terms)
30
- repository = '<a href=\"https://github.com/' + result['repository'] + '>' + result['repository'] + '</a>'
31
  license = result["license"]
32
  language = result["language"]
33
  code_height = min(600, len(text_html.split('\n')) * 20) # limit to maximum height of 600px
@@ -64,7 +64,7 @@ def search(query, num_results=10):
64
  else:
65
  response = match_query(query, num_results=num_results)
66
  results = [{"text": nh3.clean(html.escape(hit.content)), "repository": f"{hit.repository}/{hit.path}", "license": hit.license[0], "language": hit.language} for hit in response]
67
- return process_results(results)
68
 
69
  description = """# <p style="text-align: center;"><span style='color: #e6b800;'>StarCoder:</span> Dataset Search πŸ” </p>
70
  <span>When using <a href="https://huggingface.co/bigcode/large-model" style="color: #e6b800;">StarCoder</a> to generate code, it might produce exact copies of code in the pretraining dataset. \
 
19
  return string
20
 
21
 
22
+ def process_results(results, query):
23
  if len(results) == 0:
24
  return """<br><p>No results retrieved.</p><br><hr>"""
25
 
26
  results_html = ""
27
  for result in results:
28
  text_html = result["text"]
29
+ text_html = mark_tokens_bold(text_html, query.split(" "))
30
+ repository = result['repository']
31
  license = result["license"]
32
  language = result["language"]
33
  code_height = min(600, len(text_html.split('\n')) * 20) # limit to maximum height of 600px
 
64
  else:
65
  response = match_query(query, num_results=num_results)
66
  results = [{"text": nh3.clean(html.escape(hit.content)), "repository": f"{hit.repository}/{hit.path}", "license": hit.license[0], "language": hit.language} for hit in response]
67
+ return process_results(results, query)
68
 
69
  description = """# <p style="text-align: center;"><span style='color: #e6b800;'>StarCoder:</span> Dataset Search πŸ” </p>
70
  <span>When using <a href="https://huggingface.co/bigcode/large-model" style="color: #e6b800;">StarCoder</a> to generate code, it might produce exact copies of code in the pretraining dataset. \