static-rlhf-interface / collect.py
Tristan Thrush
first commit
dbe89c5
raw
history blame
4.43 kB
import argparse
from os import path
import boto3
from boto.mturk.question import ExternalQuestion
from config import MTURK_KEY, MTURK_SECRET
parser = argparse.ArgumentParser()
parser.add_argument("--mturk_region", default="us-east-1", help="The region for mturk (default: us-east-1)")
parser.add_argument(
"--space_name",
default="Tristan/static-rlhf-interface",
help="Name of the accompanying Hugging Face space (default: huggingface/rlhf-interface)",
)
parser.add_argument("--num_hits", type=int, default=5, help="The number of HITs.")
parser.add_argument(
"--num_assignments", type=int, default=1, help="The number of times that the HIT can be accepted and completed."
)
parser.add_argument(
"--live_mode",
action="store_true",
help="""
Whether to run in live mode with real turkers. This will charge your account money.
If you don't use this flag, the HITs will be deployed on the sandbox version of mturk,
which will not charge your account money.
""",
)
parser.add_argument(
"--refresh_qualification_test",
action="store_true",
help="""
Whether to refresh the qualification test. If you've made edits to the test
xml files, it is necessary to do this.
""",
)
parser.add_argument(
"--custom_qualification_test",
action="store_true",
help="""
Whether to require the custom qualification test.
""",
)
parser.add_argument(
"--master_turkers",
action="store_true",
help="""
Whether to only use turkers with the master qualification.
""",
)
parser.add_argument(
"--us_turkers",
action="store_true",
help="""
Whether to only use US-based turkers.
""",
)
args = parser.parse_args()
MTURK_URL = f"https://mturk-requester{'' if args.live_mode else '-sandbox'}.{args.mturk_region}.amazonaws.com"
mturk = boto3.client(
"mturk",
aws_access_key_id=MTURK_KEY,
aws_secret_access_key=MTURK_SECRET,
region_name=args.mturk_region,
endpoint_url=MTURK_URL,
)
# This is the URL that makes the space embeddable in an mturk iframe
question = ExternalQuestion(f"https://hf.space/embed/{args.space_name}/+?__theme=light", frame_height=600)
qualification_requirements=[]
if args.master_turkers:
qualification_requirements.append({
QualificationTypeId: '2F1QJWKUDD8XADTFD2Q0G6UTO95ALH',
Comparator: 'Exists'
})
if args.us_turkers:
qualification_requirements.append({
QualificationTypeId: '00000000000000000071',
Comparator: 'In',
LocaleValues: [
{ Country: "US" },
]
})
if args.custom_qualification_test:
qualification_type_id = (
open("qualification_type_id.txt", "r").read() if path.exists("qualification_type_id.txt") else None
)
if args.refresh_qualification_test or qualification_type_id is None:
if qualification_type_id is not None:
mturk.delete_qualification_type(QualificationTypeId=qualification_type_id)
response = mturk.create_qualification_type(
Name="rlhf--qualification",
Keywords="RLHF qualification",
Description="Qualification test for RLHF task.",
QualificationTypeStatus="Active",
Test=open("qualification_questions.xml", mode="r").read(),
AnswerKey=open("qualification_answers.xml", mode="r").read(),
TestDurationInSeconds=3600,
AutoGranted=False,
)
qualification_type_id = response["QualificationType"]["QualificationTypeId"]
open("qualification_type_id.txt", "w+").write(qualification_type_id)
qualification_requirements.append({
"QualificationTypeId": qualification_type_id,
"Comparator": "Exists",
"RequiredToPreview": False,
"ActionsGuarded": "Accept",
})
for i in range(args.num_hits):
new_hit = mturk.create_hit(
Title="RLHF HIT",
Description="Interact with an AI",
Keywords="chatbot",
Reward="0.25",
MaxAssignments=args.num_assignments,
LifetimeInSeconds=172800,
AssignmentDurationInSeconds=600,
AutoApprovalDelayInSeconds=14400,
Question=question.get_as_xml(),
QualificationRequirements=qualification_requirements,
)
print(
f"HIT Group Link: https://worker{'' if args.live_mode else 'sandbox'}.mturk.com/mturk/preview?groupId="
+ new_hit["HIT"]["HITGroupId"]
)