presidio commited on
Commit
7ba5fd9
1 Parent(s): 0a29cca

fake_data_generator.py

Browse files
Files changed (1) hide show
  1. openai_fake_data_generator.py +52 -0
openai_fake_data_generator.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ frmo typing import List
3
+ from presidio_analyzer import RecognizerResult
4
+ from presidio_anonymizer import AnonymizerEngine
5
+
6
+
7
+ def set_openai_key(openai_key:string):
8
+ """Set the OpenAI API key.
9
+ :param openai_key: the open AI key (https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key)
10
+ """
11
+ openai.api_key = openai_key
12
+
13
+
14
+ def call_completion_model(prompt:str, model:str="text-davinci-003", max_tokens:int=512) ->str:
15
+ """Creates a request for the OpenAI Completion service and returns the response.
16
+
17
+ :param prompt: The prompt for the completion model
18
+ :param model: OpenAI model name
19
+ :param temperature: Model's temperature parameter
20
+ """
21
+
22
+ response = openai.Completion.create(
23
+ model=model,
24
+ prompt= prompt,
25
+ max_tokens=max_tokens
26
+ )
27
+
28
+ return response['choices'][0].text
29
+
30
+
31
+ def create_prompt(anonymized_text: str) -> str:
32
+ """
33
+ Create the prompt with instructions to GPT-3.
34
+
35
+ :param anonymized_text: Text with placeholders instead of PII values, e.g. My name is <PERSON>.
36
+ """
37
+
38
+ prompt = f"""
39
+ Your role is to create synthetic text based on de-identified text with placeholders instead of personally identifiable information.
40
+ Replace the placeholders (e.g. , , {{DATE}}, {{ip_address}}) with fake values.
41
+
42
+ Instructions:
43
+
44
+ Use completely random numbers, so every digit is drawn between 0 and 9.
45
+ Use realistic names that come from diverse genders, ethnicities and countries.
46
+ If there are no placeholders, return the text as is and provide an answer.
47
+ input: How do I change the limit on my credit card {{credit_card_number}}?
48
+ output: How do I change the limit on my credit card 2539 3519 2345 1555?
49
+ input: {anonymized_text}
50
+ output:
51
+ """
52
+ return prompt