not-lain commited on
Commit
adc6d8b
1 Parent(s): 143c351

fix for image function

Browse files
Files changed (5) hide show
  1. .gitattributes +0 -1
  2. .gitignore +2 -0
  3. app.py +14 -12
  4. requirements.txt +1 -0
  5. test.py +59 -38
.gitattributes CHANGED
@@ -1,4 +1,3 @@
1
- .env
2
  *.7z filter=lfs diff=lfs merge=lfs -text
3
  *.arrow filter=lfs diff=lfs merge=lfs -text
4
  *.bin filter=lfs diff=lfs merge=lfs -text
 
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ lain halloween.png
app.py CHANGED
@@ -19,6 +19,7 @@ import json
19
  import dotenv
20
  from transformers import AutoProcessor, SeamlessM4TModel
21
  import torchaudio
 
22
  dotenv.load_dotenv()
23
 
24
  client = Client("https://facebook-seamless-m4t.hf.space/--replicas/frq8b/")
@@ -76,19 +77,14 @@ def process_speech_using_model(sound):
76
  return text_out
77
 
78
 
79
- def convert_image_to_required_format(image):
80
- """
81
- convert image from numpy to base64
82
- """
83
- base64_image = base64.b64encode(image).decode('utf-8')
84
- return base64_image
85
-
86
-
87
- def process_image_with_openai(image):
88
- base64_image = convert_image_to_required_format(image)
89
  openai_api_key = os.getenv('OPENAI_API_KEY')
90
  # oai_org = os.getenv('OAI_ORG')
91
-
92
  headers = {
93
  "Content-Type": "application/json",
94
  "Authorization": f"Bearer {openai_api_key}"
@@ -118,7 +114,13 @@ def process_image_with_openai(image):
118
 
119
  response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
120
 
121
- return str(response.json())
 
 
 
 
 
 
122
 
123
 
124
  def query_vectara(text):
 
19
  import dotenv
20
  from transformers import AutoProcessor, SeamlessM4TModel
21
  import torchaudio
22
+ import PIL
23
  dotenv.load_dotenv()
24
 
25
  client = Client("https://facebook-seamless-m4t.hf.space/--replicas/frq8b/")
 
77
  return text_out
78
 
79
 
80
+ def process_image(image) :
81
+ img_name = f"{np.random.randint(0, 100)}.jpg"
82
+ PIL.Image.fromarray(image.astype('uint8'), 'RGB').save(img_name)
83
+ image = open(img_name, "rb").read()
84
+ base64_image = base64_image = base64.b64encode(image).decode('utf-8')
 
 
 
 
 
85
  openai_api_key = os.getenv('OPENAI_API_KEY')
86
  # oai_org = os.getenv('OAI_ORG')
87
+
88
  headers = {
89
  "Content-Type": "application/json",
90
  "Authorization": f"Bearer {openai_api_key}"
 
114
 
115
  response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
116
 
117
+ try :
118
+ out = response.json()
119
+ out = out["choices"][0]["message"]["content"]
120
+
121
+ return f"{out}"
122
+ except Exception as e :
123
+ return f"{e}"
124
 
125
 
126
  def query_vectara(text):
requirements.txt CHANGED
@@ -4,3 +4,4 @@ git+https://github.com/huggingface/transformers
4
  torchaudio==2.0.2
5
  sentencepiece
6
  python-dotenv
 
 
4
  torchaudio==2.0.2
5
  sentencepiece
6
  python-dotenv
7
+ Pillow
test.py CHANGED
@@ -1,40 +1,61 @@
1
- # import gradio library
 
 
 
2
  import gradio as gr
 
3
  import numpy as np
4
- import base64
5
- import time
6
- import gradio_client as grc
7
- # client = grc.InterfaceDescriptionClient("https://gradio.app")
8
- # define a function that takes input and returns output
9
- client = grc.Client("facebook/seamless_m4t")
10
-
11
- print(client.view_api(all_endpoints=True))
12
- job = client.submit(
13
- "S2TT",
14
- "file",
15
- None,
16
- "sample_input.mp3",
17
- "",
18
- "French",
19
- "English",
20
- api_name="/run",
21
- )
22
- while job.done() == False :
23
- time.sleep(1)
24
-
25
-
26
- gr.Markdown(job.result())
27
- # return out
28
-
29
- # # print(client.view_api(all_endpoints=True))
30
- # def convert_image_to_required_format(audio):
31
- # sr, data = audio
32
- # return str(type(sr))+str(type(data))
33
-
34
- # # create a gradio interface
35
- # iface = gr.Interface(
36
- # fn=sound_to_text,
37
- # inputs=[gr.Audio()],
38
- # outputs=[gr.Markdown(label="Output Text")],
39
- # )
40
- # iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dotenv
2
+ import base64
3
+ import os
4
+ import requests
5
  import gradio as gr
6
+ import PIL
7
  import numpy as np
8
+
9
+ dotenv.load_dotenv()
10
+
11
+
12
+ def process_image(image) :
13
+ # img_name = f"{np.random.randint(0, 100)}.jpg"
14
+ img_name = f"{1}.jpg"
15
+ PIL.Image.fromarray(image.astype('uint8'), 'RGB').save(img_name)
16
+ image = open(img_name, "rb").read()
17
+ base64_image = base64_image = base64.b64encode(image).decode('utf-8')
18
+ openai_api_key = os.getenv('OPENAI_API_KEY')
19
+ # oai_org = os.getenv('OAI_ORG')
20
+
21
+ headers = {
22
+ "Content-Type": "application/json",
23
+ "Authorization": f"Bearer {openai_api_key}"
24
+ }
25
+
26
+ payload = {
27
+ "model": "gpt-4-vision-preview",
28
+ "messages": [
29
+ {
30
+ "role": "user",
31
+ "content": [
32
+ {
33
+ "type": "text",
34
+ "text": "What's in this image?"
35
+ },
36
+ {
37
+ "type": "image_url",
38
+ "image_url": {
39
+ "url": f"data:image/jpeg;base64,{base64_image}"
40
+ }
41
+ }
42
+ ]
43
+ }
44
+ ],
45
+ "max_tokens": 300
46
+ }
47
+
48
+ response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
49
+
50
+ try :
51
+ out = response.json()
52
+ out = out["choices"][0]["message"]["content"]
53
+ print("out : ", out)
54
+ print("type(out) : ", type(out))
55
+
56
+ return f"{out}"
57
+ except Exception as e :
58
+ return f"{e}"
59
+
60
+ iface = gr.Interface(fn=process_image, inputs="image", outputs="text")
61
+ iface.launch()