Spaces:
Runtime error
Runtime error
take off dataset update
Browse files- app_dialogue.py +0 -57
app_dialogue.py
CHANGED
@@ -304,60 +304,6 @@ def model_inference(
|
|
304 |
print("Success - generated the following text:", acc_text)
|
305 |
print("-----")
|
306 |
|
307 |
-
|
308 |
-
def csv_to_hf_dataset(csv_file):
|
309 |
-
df = pd.read_csv(csv_file)
|
310 |
-
|
311 |
-
FEATURES = datasets.Features(
|
312 |
-
{
|
313 |
-
"images": datasets.Sequence(datasets.Image(decode=True)),
|
314 |
-
"conversation": [
|
315 |
-
{
|
316 |
-
"user": datasets.Value("string"),
|
317 |
-
"assistant": datasets.Value("string"),
|
318 |
-
}
|
319 |
-
],
|
320 |
-
}
|
321 |
-
)
|
322 |
-
|
323 |
-
def parse_and_download(data_row):
|
324 |
-
# Parse the JSON-like structure in the second column
|
325 |
-
discussion_data = json.loads(data_row[1].replace('""', '"'))
|
326 |
-
|
327 |
-
images = []
|
328 |
-
conversation = []
|
329 |
-
for entry in discussion_data:
|
330 |
-
if isinstance(entry[0], dict) and 'file' in entry[0]:
|
331 |
-
# Get images
|
332 |
-
image = load_image_from_url(entry[0]['file'])
|
333 |
-
images.append(image)
|
334 |
-
elif isinstance(entry, list):
|
335 |
-
# Get conversations
|
336 |
-
conversation.append({"user": entry[0], "assistant": entry[1]})
|
337 |
-
|
338 |
-
return images, conversation
|
339 |
-
|
340 |
-
|
341 |
-
# Apply parsing and downloading function
|
342 |
-
df['processed_data'] = df.apply(parse_and_download, axis=1)
|
343 |
-
|
344 |
-
# Create a Hugging Face dataset
|
345 |
-
data_dict = {
|
346 |
-
"images": df['processed_data'].apply(lambda x: x[0]),
|
347 |
-
"conversation": df['processed_data'].apply(lambda x: x[1])
|
348 |
-
}
|
349 |
-
|
350 |
-
dataset = datasets.Dataset.from_dict(data_dict, features=FEATURES)
|
351 |
-
return dataset
|
352 |
-
|
353 |
-
|
354 |
-
def update_dope_problematic_dataset_fn():
|
355 |
-
dope_dataset = csv_to_hf_dataset("gradio_dope_data_points/log.csv")
|
356 |
-
dope_dataset.push_to_hub("HuggingFaceM4/dope_chatty_dataset", private=True)
|
357 |
-
problematic_dataset = csv_to_hf_dataset("gradio_problematic_data_points/log.csv")
|
358 |
-
problematic_dataset.push_to_hub("HuggingFaceM4/problematic_chatty_dataset", private=True)
|
359 |
-
|
360 |
-
|
361 |
# Hyper-parameters for generation
|
362 |
max_new_tokens = gr.Slider(
|
363 |
minimum=8,
|
@@ -535,8 +481,5 @@ with gr.Blocks(fill_height=True, css=""".gradio-container .avatar-container {hei
|
|
535 |
None,
|
536 |
preprocess=False,
|
537 |
)
|
538 |
-
update_dope_problematic_dataset.click(
|
539 |
-
fn=update_dope_problematic_dataset_fn,
|
540 |
-
)
|
541 |
|
542 |
demo.launch()
|
|
|
304 |
print("Success - generated the following text:", acc_text)
|
305 |
print("-----")
|
306 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
307 |
# Hyper-parameters for generation
|
308 |
max_new_tokens = gr.Slider(
|
309 |
minimum=8,
|
|
|
481 |
None,
|
482 |
preprocess=False,
|
483 |
)
|
|
|
|
|
|
|
484 |
|
485 |
demo.launch()
|