kcelia commited on
Commit
dc83cd7
1 Parent(s): 67fa189

chore: fix chatgpt step

Browse files
Files changed (1) hide show
  1. app.py +21 -21
app.py CHANGED
@@ -132,7 +132,7 @@ def encrypt_query_fn(query):
132
 
133
  encrypted_tokens.append(encrypted_x)
134
 
135
- print(f"Data encrypted ✅ on Client Side")
136
 
137
  assert len({len(token) for token in encrypted_tokens}) == 1
138
 
@@ -355,7 +355,7 @@ def decrypt_fn(text) -> Dict:
355
  else:
356
  identified_df = pd.DataFrame(columns=["Identified Words", "Probability"])
357
 
358
- print(f"Decryption done ✅ on Client Side")
359
 
360
  return anonymized_text, identified_df
361
 
@@ -380,25 +380,20 @@ def anonymization_with_fn(query):
380
 
381
  def query_chatgpt_fn(anonymized_query, anonymized_document):
382
 
383
- evaluation_key_path = KEYS_DIR / "evaluation_key"
384
- if not evaluation_key_path.is_file():
 
385
  error_message = "Error ❌: Please generate the key first!"
386
- return {anonymized_text_output: gr.update(value=error_message)}
387
 
388
- encryted_query_path = KEYS_DIR / "encrypted_quantized_query"
389
- if not encryted_query_path.is_file():
390
  error_message = "Error ❌: Please encrypt your query first!"
391
- return {anonymized_text_output: gr.update(value=error_message)}
392
-
393
- decrypted_query_path = KEYS_DIR / "reconstructed_sentence"
394
- if not decrypted_query_path.is_file():
395
- error_message = "Error ❌: Please run the FHE computation first!"
396
- return {anonymized_text_output: gr.update(value=error_message)}
397
 
398
  prompt = read_txt(PROMPT_PATH)
399
 
400
  # Prepare prompt
401
- full_prompt = prompt + "\n"
402
  query = (
403
  "Document content:\n```\n"
404
  + anonymized_document
@@ -407,7 +402,7 @@ def query_chatgpt_fn(anonymized_query, anonymized_document):
407
  + anonymized_query
408
  + "\n```"
409
  )
410
- print(full_prompt)
411
 
412
  completion = client.chat.completions.create(
413
  model="gpt-4-1106-preview", # Replace with "gpt-4" if available
@@ -438,7 +433,9 @@ def query_chatgpt_fn(anonymized_query, anonymized_document):
438
  else:
439
  processed_tokens.append(token)
440
  deanonymized_response = "".join(processed_tokens)
441
- return anonymized_response, deanonymized_response
 
 
442
 
443
 
444
  demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
@@ -473,8 +470,7 @@ with demo:
473
 
474
  with gr.Accordion("What is encrypted anonymization?", open=False):
475
  gr.Markdown(
476
- """
477
- Anonymization is the process of removing personally identifiable information (PII)
478
  from data to protect individual privacy.
479
 
480
  To resolve trust issues when deploying anonymization as a cloud service, Fully Homomorphic
@@ -507,13 +503,16 @@ with demo:
507
 
508
  ########################## Main document Part ##########################
509
 
 
510
  gr.Markdown("## Step 2: Private document")
511
 
512
  with gr.Row():
513
  with gr.Column():
514
  gr.Markdown("**Original document:**")
515
  gr.Markdown(
516
- """This document was retrieved from the [Microsoft Presidio](https://huggingface.co/spaces/presidio/presidio_demo) demo.\n\n
 
 
517
  You can select and deselect sentences to customize the document that will be used
518
  as the initial prompt for ChatGPT in step 5.
519
  """
@@ -522,7 +521,7 @@ with demo:
522
  gr.Markdown("**Anonymized document:**")
523
  gr.Markdown(
524
  """You can see below the anonymized text, replaced with hexademical strings, that
525
- will be sent to ChatGPT.
526
 
527
  ChatGPT will then be able to answer any queries about the document.
528
  """
@@ -554,7 +553,8 @@ with demo:
554
 
555
  gr.Markdown(
556
  """Now, you can formulate a query. Please choose from the predefined options in
557
- Queries examples or craft a custom question in the “Customized query” text box.
 
558
 
559
  Remain concise and relevant to the context. Any off-topic query will not be processed.
560
  """
 
132
 
133
  encrypted_tokens.append(encrypted_x)
134
 
135
+ print("Data encrypted ✅ on Client Side")
136
 
137
  assert len({len(token) for token in encrypted_tokens}) == 1
138
 
 
355
  else:
356
  identified_df = pd.DataFrame(columns=["Identified Words", "Probability"])
357
 
358
+ print("Decryption done ✅ on Client Side")
359
 
360
  return anonymized_text, identified_df
361
 
 
380
 
381
  def query_chatgpt_fn(anonymized_query, anonymized_document):
382
 
383
+ print("------------ Step 5: ChatGPT communication")
384
+
385
+ if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
386
  error_message = "Error ❌: Please generate the key first!"
387
+ return {chatgpt_response_anonymized: gr.update(value=error_message)}
388
 
389
+ if not (CLIENT_DIR / f"{USER_ID}_encrypted_output").is_file():
 
390
  error_message = "Error ❌: Please encrypt your query first!"
391
+ return {chatgpt_response_anonymized: gr.update(value=error_message)}
 
 
 
 
 
392
 
393
  prompt = read_txt(PROMPT_PATH)
394
 
395
  # Prepare prompt
396
+ initial_prompt = prompt + "\n"
397
  query = (
398
  "Document content:\n```\n"
399
  + anonymized_document
 
402
  + anonymized_query
403
  + "\n```"
404
  )
405
+ print(f'initial_prompt:\n{initial_prompt}')
406
 
407
  completion = client.chat.completions.create(
408
  model="gpt-4-1106-preview", # Replace with "gpt-4" if available
 
433
  else:
434
  processed_tokens.append(token)
435
  deanonymized_response = "".join(processed_tokens)
436
+
437
+ return {chatgpt_response_anonymized: gr.update(value=anonymized_response),
438
+ chatgpt_response_deanonymized: gr.update(value=deanonymized_response)}
439
 
440
 
441
  demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
 
470
 
471
  with gr.Accordion("What is encrypted anonymization?", open=False):
472
  gr.Markdown(
473
+ """Anonymization is the process of removing personally identifiable information (PII)
 
474
  from data to protect individual privacy.
475
 
476
  To resolve trust issues when deploying anonymization as a cloud service, Fully Homomorphic
 
503
 
504
  ########################## Main document Part ##########################
505
 
506
+ gr.Markdown("<hr />")
507
  gr.Markdown("## Step 2: Private document")
508
 
509
  with gr.Row():
510
  with gr.Column():
511
  gr.Markdown("**Original document:**")
512
  gr.Markdown(
513
+ """This document was retrieved from the
514
+ [Microsoft Presidio](https://huggingface.co/spaces/presidio/presidio_demo) demo.
515
+
516
  You can select and deselect sentences to customize the document that will be used
517
  as the initial prompt for ChatGPT in step 5.
518
  """
 
521
  gr.Markdown("**Anonymized document:**")
522
  gr.Markdown(
523
  """You can see below the anonymized text, replaced with hexademical strings, that
524
+ will be sent to ChatGPT.
525
 
526
  ChatGPT will then be able to answer any queries about the document.
527
  """
 
553
 
554
  gr.Markdown(
555
  """Now, you can formulate a query. Please choose from the predefined options in
556
+ <span style='color:grey'>“Queries examples”</span>" or craft a custom question in
557
+ the <span style='color:grey'>“Customized query”</span>" text box.
558
 
559
  Remain concise and relevant to the context. Any off-topic query will not be processed.
560
  """