Spaces:
Runtime error
Runtime error
Commit
•
0eb5ff7
1
Parent(s):
113b487
Update app.py (#1)
Browse files- Update app.py (0bbd778e1bfd900c60352f0c5a9c742cdbb4acc1)
Co-authored-by: Niklas Muennighoff <Muennighoff@users.noreply.huggingface.co>
app.py
CHANGED
@@ -17,10 +17,10 @@ text = """\
|
|
17 |
|
18 |
# Am I in The CommitPack?
|
19 |
|
20 |
-
As part of the BigCode project, we released and maintain [CommitPack](https://huggingface.co/datasets/bigcode/commitpack), a 4 TB dataset of permissively licensed
|
21 |
""" + """\
|
22 |
|
23 |
-
This tool lets you check if a repository under a given username is part of CommitPack dataset. Would you like to have your data removed from future versions of CommitPack?
|
24 |
"""
|
25 |
|
26 |
opt_out_text_template = """\
|
@@ -33,7 +33,7 @@ open an issue with <a href="https://github.com/bigcode-project/opt-out-v2/issues
|
|
33 |
|
34 |
opt_out_issue_title = """Opt-out request for {username}"""
|
35 |
opt_out_issue_body = """\
|
36 |
-
I request that the following data is removed from
|
37 |
|
38 |
- Commits
|
39 |
- GitHub issue
|
@@ -57,13 +57,13 @@ def check_username(username):
|
|
57 |
if username in usernames and len(username_to_repo[username]) > 0:
|
58 |
repos = username_to_repo[username]
|
59 |
repo_word = "repository" if len(repos) == 1 else "repositories"
|
60 |
-
output_md += f"**Yes**, there is code from **{len(repos)} {repo_word}** in
|
61 |
for repo in repos:
|
62 |
output_md += f"_{repo}_\n\n"
|
63 |
|
64 |
return output_md.strip(), issue_url(username, repos)
|
65 |
else:
|
66 |
-
output_md += "**No**, your code is not in
|
67 |
return output_md.strip(), ""
|
68 |
|
69 |
|
|
|
17 |
|
18 |
# Am I in The CommitPack?
|
19 |
|
20 |
+
As part of the BigCode project, we released and maintain [CommitPack](https://huggingface.co/datasets/bigcode/commitpack), a 4 TB dataset of permissively licensed Git commits covering 350 programming languages. One of our goals in this project is to give people agency over their source code by letting them decide whether or not it should be used to develop and evaluate machine learning models, as we acknowledge that not all developers may wish to have their data used for that purpose.
|
21 |
""" + """\
|
22 |
|
23 |
+
This tool lets you check if a repository under a given username is part of the CommitPack dataset. Would you like to have your data removed from future versions of CommitPack? The CommitPack uses the same opt-out as The Stack, so you can opt-out by following the instructions [here](https://www.bigcode-project.org/docs/about/the-stack/#how-can-i-request-that-my-data-be-removed-from-the-stack).
|
24 |
"""
|
25 |
|
26 |
opt_out_text_template = """\
|
|
|
33 |
|
34 |
opt_out_issue_title = """Opt-out request for {username}"""
|
35 |
opt_out_issue_body = """\
|
36 |
+
I request that the following data is removed from the CommitPack:
|
37 |
|
38 |
- Commits
|
39 |
- GitHub issue
|
|
|
57 |
if username in usernames and len(username_to_repo[username]) > 0:
|
58 |
repos = username_to_repo[username]
|
59 |
repo_word = "repository" if len(repos) == 1 else "repositories"
|
60 |
+
output_md += f"**Yes**, there is code from **{len(repos)} {repo_word}** in the CommitPack:\n\n"
|
61 |
for repo in repos:
|
62 |
output_md += f"_{repo}_\n\n"
|
63 |
|
64 |
return output_md.strip(), issue_url(username, repos)
|
65 |
else:
|
66 |
+
output_md += "**No**, your code is not in the CommitPack."
|
67 |
return output_md.strip(), ""
|
68 |
|
69 |
|