Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -19,15 +19,15 @@ def mark_tokens_bold(string, tokens):
|
|
19 |
return string
|
20 |
|
21 |
|
22 |
-
def process_results(results):
|
23 |
if len(results) == 0:
|
24 |
return """<br><p>No results retrieved.</p><br><hr>"""
|
25 |
|
26 |
results_html = ""
|
27 |
for result in results:
|
28 |
text_html = result["text"]
|
29 |
-
|
30 |
-
repository =
|
31 |
license = result["license"]
|
32 |
language = result["language"]
|
33 |
code_height = min(600, len(text_html.split('\n')) * 20) # limit to maximum height of 600px
|
@@ -64,7 +64,7 @@ def search(query, num_results=10):
|
|
64 |
else:
|
65 |
response = match_query(query, num_results=num_results)
|
66 |
results = [{"text": nh3.clean(html.escape(hit.content)), "repository": f"{hit.repository}/{hit.path}", "license": hit.license[0], "language": hit.language} for hit in response]
|
67 |
-
return process_results(results)
|
68 |
|
69 |
description = """# <p style="text-align: center;"><span style='color: #e6b800;'>StarCoder:</span> Dataset Search π </p>
|
70 |
<span>When using <a href="https://huggingface.co/bigcode/large-model" style="color: #e6b800;">StarCoder</a> to generate code, it might produce exact copies of code in the pretraining dataset. \
|
|
|
19 |
return string
|
20 |
|
21 |
|
22 |
+
def process_results(results, query):
|
23 |
if len(results) == 0:
|
24 |
return """<br><p>No results retrieved.</p><br><hr>"""
|
25 |
|
26 |
results_html = ""
|
27 |
for result in results:
|
28 |
text_html = result["text"]
|
29 |
+
text_html = mark_tokens_bold(text_html, query.split(" "))
|
30 |
+
repository = result['repository']
|
31 |
license = result["license"]
|
32 |
language = result["language"]
|
33 |
code_height = min(600, len(text_html.split('\n')) * 20) # limit to maximum height of 600px
|
|
|
64 |
else:
|
65 |
response = match_query(query, num_results=num_results)
|
66 |
results = [{"text": nh3.clean(html.escape(hit.content)), "repository": f"{hit.repository}/{hit.path}", "license": hit.license[0], "language": hit.language} for hit in response]
|
67 |
+
return process_results(results, query)
|
68 |
|
69 |
description = """# <p style="text-align: center;"><span style='color: #e6b800;'>StarCoder:</span> Dataset Search π </p>
|
70 |
<span>When using <a href="https://huggingface.co/bigcode/large-model" style="color: #e6b800;">StarCoder</a> to generate code, it might produce exact copies of code in the pretraining dataset. \
|