Alesmikes GenAIDemo commited on
Commit
52f6db2
0 Parent(s):

Duplicate from GenAIDemo/economic-forecast

Browse files

Co-authored-by: Gen AI Demo <GenAIDemo@users.noreply.huggingface.co>

Files changed (6) hide show
  1. .env +5 -0
  2. .gitattributes +34 -0
  3. README.md +13 -0
  4. app.py +139 -0
  5. gcp_access_key.json +12 -0
  6. requirements.txt +103 -0
.env ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ OPENAI_API_KEY = "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"
2
+ OPENAI_ORG = "org-hjgDjHii99kOiQDug5cQxuf4"
3
+ Pinecone_KEY="24b5f67e-eba8-4c64-8ee7-292ba39145fd"
4
+ Pinecone_ENV="us-west1-gcp"
5
+ Pinecone_INDEX="pinecone"
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: QnA
3
+ emoji: 📈
4
+ colorFrom: indigo
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.24.1
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: GenAIDemo/economic-forecast
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ this model only supports english since text to speech is an english only model
3
+ """
4
+ from google.cloud import texttospeech
5
+ import os
6
+ import openai
7
+ import gradio as gr
8
+ from dotenv import load_dotenv
9
+ import pinecone
10
+
11
+
12
+ """
13
+ login to gcp
14
+ """
15
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "gcp_access_key.json"
16
+ # Instantiates a client
17
+ client = texttospeech.TextToSpeechClient()
18
+
19
+ """
20
+ Connecting to Open AI API
21
+ """
22
+ load_dotenv()
23
+ openai.organization = os.getenv("OPENAI_ORG")
24
+ openai.api_key = os.getenv("OPENAI_API_KEY")
25
+ EMBEDDING_MODEL = "text-embedding-ada-002"
26
+ """
27
+ Connecting to pincone API and assign index
28
+ """
29
+ index_name = 'economic-forecast'
30
+ pinecone.init(
31
+ api_key=os.getenv("Pinecone_KEY"),
32
+ environment=os.getenv("Pinecone_ENV")
33
+ )
34
+
35
+ ## initial a first message to define GPT's role
36
+
37
+
38
+ """
39
+ define the text -> speech function
40
+ """
41
+ def text2speech(text):
42
+
43
+ # Set the text input to be synthesized
44
+ synthesis_input = texttospeech.SynthesisInput(text=text)
45
+
46
+ # Build the voice request, select the language code ("en-US") and the ssml
47
+ # voice gender ("neutral")
48
+ voice = texttospeech.VoiceSelectionParams(
49
+ language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.MALE
50
+ )
51
+
52
+ # Select the type of audio file you want returned
53
+ audio_config = texttospeech.AudioConfig(
54
+ audio_encoding=texttospeech.AudioEncoding.MP3
55
+ )
56
+
57
+ # Perform the text-to-speech request on the text input with the selected
58
+ # voice parameters and audio file type
59
+ response = client.synthesize_speech(
60
+ input=synthesis_input, voice=voice, audio_config=audio_config
61
+ )
62
+ # The response's audio_content is binary.
63
+ with open("output.mp3", "wb") as out:
64
+ # Write the response to the output file.
65
+ out.write(response.audio_content)
66
+ print('Audio content written to file "output.mp3"')
67
+
68
+ """
69
+ define voice -> gpt -> text -> voice workflow
70
+ """
71
+ def transcribe(audio):
72
+ #global messages
73
+
74
+ """
75
+ gradio output file doesn't have .wav so rename the file to the correct format
76
+ """
77
+ extension = ".wav"
78
+ audiofomated = f"{audio}{extension}"
79
+ os.rename(audio,audiofomated)
80
+
81
+ """
82
+ pass the audio file to whisper to transcribe
83
+
84
+ """
85
+ audio_file = open(audiofomated, "rb")
86
+ transcript = openai.Audio.transcribe("whisper-1", audio_file)
87
+
88
+
89
+ """
90
+ run cosin similarity to find context
91
+ """
92
+ ### Input the question and search for the relavent text
93
+ index = pinecone.Index(index_name)
94
+ query = openai.Embedding.create(input=transcript["text"], model=EMBEDDING_MODEL)["data"][0]["embedding"] # embed the user query into an embedding vector
95
+ res = index.query(query, top_k=3, include_metadata=True) # run cosin similarity to search the most relavent embeded content; this is done in pinecone only
96
+ contexts = [
97
+ x['metadata']['text'] for x in res['matches']
98
+ ]
99
+ merged_context = "".join(contexts)
100
+ contextwithQuestion = "Context: " + "\n"+ merged_context + "*End of the context*" + "\n\n" + "Question: " + transcript["text"]
101
+
102
+
103
+ """
104
+ pass the transcripted text to GPT
105
+ """
106
+ messages = [
107
+ {"role": "system",
108
+ "content":
109
+ "You are an assistant that answers questions only based on the context provided. Before each question, some context will be provided.\
110
+ Context starts with 'Context:' and end with '*End of the context*'. Once you receive all the context, you will consider all of them to answer the questions.\
111
+ It is very important to answer the question as honestly as possible.\
112
+ If you are not sure about the answer based on the context provided, you can still try to come up with an answer but you must also tell the user that you are not confident about the answer and that the user should look for a secondary source to confirm the answer.\
113
+ It is very important to answer the questions politely. It is very important to answer the question in great detail.\
114
+ Once you receive all the context, you will receive a question that starts with 'Question:'. Once you receive the question, you can answer the question.\
115
+ "}
116
+ ]
117
+ messages.append({"role": "user", "content":contextwithQuestion}) ## add user input to the list of message
118
+
119
+ response = openai.ChatCompletion.create(
120
+ model="gpt-3.5-turbo",
121
+ messages=messages
122
+ ) ## pass the list of message to GPT
123
+
124
+ messages.append({"role": "assistant", "content":response["choices"][0]["message"]["content"]}) ## add GPT response to the list of message
125
+ text2speech(response["choices"][0]["message"]["content"]) ## create mp3 voice output
126
+
127
+ voice_path = os.path.abspath("output.mp3")
128
+
129
+ return voice_path
130
+
131
+
132
+ output_audio = gr.outputs.Audio(type = "filepath", label="AI Assistant")
133
+
134
+ gr.Interface(fn=transcribe, \
135
+ inputs=gr.Audio(source="microphone", type="filepath",label="Speak here..."), \
136
+ outputs=output_audio, \
137
+ live=True,\
138
+ allow_flagging='never')\
139
+ .launch() ## add share=True to publish on the public site
gcp_access_key.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "service_account",
3
+ "project_id": "thinking-return-379505",
4
+ "private_key_id": "07f8fa166e7d01d416e21b8dab3c83174ed7b16f",
5
+ "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEuwIBADANBgkqhkiG9w0BAQEFAASCBKUwggShAgEAAoIBAQDp+fRINIskg1LH\ntOCoK6FHadXJ8zwS/7ktFY8xkYKlwjZl42VzRm3ypXTwH97A5FD1H6XOcRqATGFi\nuoIROC+0QUGSzqfS9epju0joJymDfBlTqeq+GbcgI9QGFV/TfBUUpJCbsYnOqjXu\npDSsNM8HFqqHMhhCn9RUeAuHMeoWhlvLiCr9VTTli9nEbtBR+f3JNkMT+Brzy9ee\nY7b+LQ30Ac84t1f+rv+yiTGnsOln9OAYc80sLt41lG1cloqCvhwy/K+HsVOMwLdC\nic6FoyotaT5oGxTTZgrM+fFPkS1H0av+b396ERCU2JoqgKVp3LAQFyaULmKdem/O\nRx33oOWRAgMBAAECgf9TSMXLbeiZdqVgsOHG+sDNyr5uxEX9/UjM43+1BTacCiWS\n3v6dqdQ5KuwVDUMdJigrGLjMYzYrtQR1QC5MMtPINfJBb44NabulBGVPPaFpTCM3\nxKPqRv0IU7iE8dbKUMCwOGPBB9El3zCTYZ/eLG4E9+x7czlvfbT23EQ+O2Y33a1N\nKXv1kPQz+xDMbqELwWbGlEi393PNdoBBKP/xiq60WxxcFB/LK4L/z7xMvEQETOEw\nOGABUOFuqYQQ3Lx+3pYP5vzwYH9qAX7lvyO2ARjgDPmgmcgmgFq4vfkSitXATDMa\neH3J5Qxg0zBZ05ZyqSmJL68D9dSUHwxLVw/kIYECgYEA+MrVnzgHckQIPaS1IIgy\ntJj2esBfUwRWLx+4frHcmTYknRcuVsjusHrOUcpOnEVXmQUlvq5hNdY8UUlK7rqb\nAk0hsyFhVhe+sZKyfGoqmxaI8ulCFEF9WbjtTBYunrj7Y9nIQCtkZAOeBNgEPYzH\nKGGjGwq2kD4jF3Ff6NoLc9ECgYEA8ME8zibJQ3RltVNaX27Ms4ESUHHLkicuknZ9\njmTHf6NrPA9x34F6n4KshPOWytivIHqeS6ndbhV6q2msYj0hHZoJMuzWUDeLOvwQ\nzuPvFvpeFOpxT+4sGicXr+3XZ3TUUCWUXojv2oFuXgvbvoh1FsF9nLRB0vcuQRJr\nTFhjhcECgYAHo1KDnNi9rVEDc3cMFKJZ4QhQJG/IwK1UrQig+xzmpz7gg8GscGbD\njEGhSTC4o3hY/kt2Wywo1vbrF/Dw+1Ge9HdAZJn0Zq44DWTlZO0B7eMucfLRZGFQ\ny9idKWEtBkjRe/Il5i8i0umbBHa83QGQP88eGWoZa47wPU7Qe0E2wQKBgQCwCKrc\nFVIN51rCxWqL4uj7y0FH3py5hbqwzsCpUBjqw+ORq71YbkTpwtM/9xfnw53xQpbh\nvhcw+bWTBhZMUmQXT9ywBqoBR0ufWlUz4UedZn1KD83AYuaOTM0uDtr0Q2FGJOQi\nZz/xZbf8PfFzlDUA6YNeBBMpHSEN5VjdHrfOQQKBgAWA7RUVCcrT51YoEDB1L0Q9\nGgcO5GZC9wOI2d/YYmmLXHQr+bmPsKTz0ktfgXibdFeJF2/cMcK8Jc67NJQ/8mJ9\noY/O6T4P4JpeE7n+rClCbTFusqlPXcdEr+upEfj4hqcHnnzoCIurX84/EcyjS3IU\nPF3puKbNR2+5NwSM394k\n-----END PRIVATE KEY-----\n",
6
+ "client_email": "texttospeechserviceaccount@thinking-return-379505.iam.gserviceaccount.com",
7
+ "client_id": "106508783634225510308",
8
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
9
+ "token_uri": "https://oauth2.googleapis.com/token",
10
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
11
+ "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/texttospeechserviceaccount%40thinking-return-379505.iam.gserviceaccount.com"
12
+ }
requirements.txt ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.1.0
2
+ aiohttp==3.8.4
3
+ aiosignal==1.3.1
4
+ altair==4.2.2
5
+ anyio==3.6.2
6
+ appdirs==1.4.4
7
+ async-timeout==4.0.2
8
+ attrs==22.2.0
9
+ audioread==3.0.0
10
+ cachetools==5.3.0
11
+ certifi==2022.12.7
12
+ cffi==1.15.1
13
+ charset-normalizer==3.1.0
14
+ click==8.1.3
15
+ contourpy==1.0.7
16
+ cycler==0.11.0
17
+ datasets==2.10.1
18
+ decorator==5.1.1
19
+ dill==0.3.6
20
+ dnspython==2.3.0
21
+ entrypoints==0.4
22
+ fastapi==0.94.1
23
+ ffmpy==0.3.0
24
+ filelock==3.10.0
25
+ fonttools==4.39.2
26
+ frozenlist==1.3.3
27
+ fsspec==2023.3.0
28
+ google-api-core==2.11.0
29
+ google-auth==2.16.2
30
+ google-cloud-texttospeech==2.14.1
31
+ googleapis-common-protos==1.58.0
32
+ gradio==3.21.0
33
+ grpcio==1.51.3
34
+ grpcio-status==1.51.3
35
+ h11==0.14.0
36
+ httpcore==0.16.3
37
+ httpx==0.23.3
38
+ huggingface-hub==0.13.2
39
+ idna==3.4
40
+ Jinja2==3.1.2
41
+ joblib==1.2.0
42
+ jsonschema==4.17.3
43
+ kiwisolver==1.4.4
44
+ lazy_loader==0.1
45
+ librosa==0.10.0.post2
46
+ linkify-it-py==2.0.0
47
+ llvmlite==0.39.1
48
+ loguru==0.6.0
49
+ markdown-it-py==2.2.0
50
+ MarkupSafe==2.1.2
51
+ matplotlib==3.7.1
52
+ mdit-py-plugins==0.3.3
53
+ mdurl==0.1.2
54
+ msgpack==1.0.5
55
+ multidict==6.0.4
56
+ multiprocess==0.70.14
57
+ numba==0.56.4
58
+ numpy==1.23.5
59
+ openai==0.27.2
60
+ orjson==3.8.7
61
+ packaging==23.0
62
+ pandas==1.5.3
63
+ Pillow==9.4.0
64
+ pinecone-client==2.2.1
65
+ pooch==1.6.0
66
+ proto-plus==1.22.2
67
+ protobuf==4.22.1
68
+ pyarrow==11.0.0
69
+ pyasn1==0.4.8
70
+ pyasn1-modules==0.2.8
71
+ pycparser==2.21
72
+ pydantic==1.10.6
73
+ pydub==0.25.1
74
+ pyparsing==3.0.9
75
+ pyrsistent==0.19.3
76
+ python-dateutil==2.8.2
77
+ python-dotenv==1.0.0
78
+ python-multipart==0.0.6
79
+ pytz==2022.7.1
80
+ PyYAML==6.0
81
+ regex==2022.10.31
82
+ requests==2.28.2
83
+ responses==0.18.0
84
+ rfc3986==1.5.0
85
+ rsa==4.9
86
+ scikit-learn==1.2.2
87
+ scipy==1.10.1
88
+ six==1.16.0
89
+ sniffio==1.3.0
90
+ soundfile==0.12.1
91
+ soxr==0.3.4
92
+ starlette==0.26.1
93
+ threadpoolctl==3.1.0
94
+ tiktoken==0.3.2
95
+ toolz==0.12.0
96
+ tqdm==4.65.0
97
+ typing_extensions==4.5.0
98
+ uc-micro-py==1.0.1
99
+ urllib3==1.26.15
100
+ uvicorn==0.21.1
101
+ websockets==10.4
102
+ xxhash==3.2.0
103
+ yarl==1.8.2