Spaces:
Running
Running
fake_data_generator.py
Browse files
openai_fake_data_generator.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
frmo typing import List
|
3 |
+
from presidio_analyzer import RecognizerResult
|
4 |
+
from presidio_anonymizer import AnonymizerEngine
|
5 |
+
|
6 |
+
|
7 |
+
def set_openai_key(openai_key:string):
|
8 |
+
"""Set the OpenAI API key.
|
9 |
+
:param openai_key: the open AI key (https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key)
|
10 |
+
"""
|
11 |
+
openai.api_key = openai_key
|
12 |
+
|
13 |
+
|
14 |
+
def call_completion_model(prompt:str, model:str="text-davinci-003", max_tokens:int=512) ->str:
|
15 |
+
"""Creates a request for the OpenAI Completion service and returns the response.
|
16 |
+
|
17 |
+
:param prompt: The prompt for the completion model
|
18 |
+
:param model: OpenAI model name
|
19 |
+
:param temperature: Model's temperature parameter
|
20 |
+
"""
|
21 |
+
|
22 |
+
response = openai.Completion.create(
|
23 |
+
model=model,
|
24 |
+
prompt= prompt,
|
25 |
+
max_tokens=max_tokens
|
26 |
+
)
|
27 |
+
|
28 |
+
return response['choices'][0].text
|
29 |
+
|
30 |
+
|
31 |
+
def create_prompt(anonymized_text: str) -> str:
|
32 |
+
"""
|
33 |
+
Create the prompt with instructions to GPT-3.
|
34 |
+
|
35 |
+
:param anonymized_text: Text with placeholders instead of PII values, e.g. My name is <PERSON>.
|
36 |
+
"""
|
37 |
+
|
38 |
+
prompt = f"""
|
39 |
+
Your role is to create synthetic text based on de-identified text with placeholders instead of personally identifiable information.
|
40 |
+
Replace the placeholders (e.g. , , {{DATE}}, {{ip_address}}) with fake values.
|
41 |
+
|
42 |
+
Instructions:
|
43 |
+
|
44 |
+
Use completely random numbers, so every digit is drawn between 0 and 9.
|
45 |
+
Use realistic names that come from diverse genders, ethnicities and countries.
|
46 |
+
If there are no placeholders, return the text as is and provide an answer.
|
47 |
+
input: How do I change the limit on my credit card {{credit_card_number}}?
|
48 |
+
output: How do I change the limit on my credit card 2539 3519 2345 1555?
|
49 |
+
input: {anonymized_text}
|
50 |
+
output:
|
51 |
+
"""
|
52 |
+
return prompt
|