anmolsahai commited on
Commit
c1c7a1b
1 Parent(s): 2fec317
Files changed (1) hide show
  1. app.py +16 -13
app.py CHANGED
@@ -4,23 +4,27 @@ import fitz # PyMuPDF
4
  from langchain_core.prompts import PromptTemplate
5
  from google.cloud import aiplatform
6
  from google.cloud.aiplatform_v1.services.model_service import ModelServiceClient
7
- from google.cloud.aiplatform_v1.types import GenerateContentRequest, GenerationConfig, Model
8
  import streamlit as st
9
 
 
 
 
 
10
  # Initialize the Google AI Platform
11
  aiplatform.init(project="akroda", location="us-central1")
12
 
13
- # Define the documents as dictionaries
14
  documents = [
15
- {"content_type": "application/pdf", "data": base64.b64decode("JVBERi0xLjQKJeODgxNz5dL1Jvb3QgMTU0IDAgUi9TaXplIDE2Nj4+CnN0YXJ0eHJlZgoyMTY0NjkKJSVFT0YK")},
16
- {"content_type": "application/pdf", "data": base64.b64decode("JVBERi0xLjQKJeLjz9MKNijU+PgpzdGFydHhyZWYKMTMxMDY0CiUlRU9GCg==")},
17
- {"content_type": "application/pdf", "data": base64.b64decode("JVBERi0xLjQKJeLjz9MKNiAwZDU0YTVlNzllMWRhYWY1ZDQ2YjI+XS9Sb290IDE3NyAwIFIvU2l6ZSAxODc+PgpzdGFydHhyZWYKMjA3NTk5CiUlRU9GCg==")},
18
- {"content_type": "application/pdf", "data": base64.b64decode("JVBERi0xLjQKJeLjz9ML1Jvb3QgMTg5IDAgUi9TaXplIDE5OT4+CnN0YXJ0eHJlZgoxOTgzNzMKJSVFT0YK")},
19
- {"content_type": "application/pdf", "data": base64.b64decode("JVBERi0xCcnCmVuZHN0cmVhbQplbmRvYmoKc3RhcnR4cmVmCjIwOTgyNQolJUVPRgo=")},
20
- {"content_type": "application/pdf", "data": base64.b64decode("JVBERi0xLj+CnN0YXJ0eHJlZgoyMTk5MDYKJSVFT0YK")},
21
- {"content_type": "application/pdf", "data": base64.b64decode("JVBERi0xLjQKJiUlRU9GCg==")},
22
- {"content_type": "application/pdf", "data": base64.b64decode("JVBERi0xLjQKJe90IDMwOCAwIFIvU2l6ZSAzMTg+PgpzdGFydHhyZWYKMjcwNzU3CiUlRU9GCg==")},
23
- {"content_type": "application/pdf", "data": base64.b64decode("JVBERi0xLjUNJeLjz9MNCjcgMCBvYmoNPDwvTGluZWFyaXplZCAxL0wgNjc1NzgvTyA5L0UgNjAyNDYvTiAxL1QgNjcyODcvSCBbIDQ4MyAxNTRdPj4NZW5kb2JxDSAgICAgICAgICAgICAgICAgICAgDQoyMiAwIG9iag08PC9EZWNvZGVQYXJtczw8L0NvbHVtbnMgNC9QcmVkaWN0b3IgMTI+Pi9GaWx0ZXIvRmxhdGVEZWNvZGUvSURbPDE3NzU4MkJFODc4MzRFQjNBOEM3RkIzQTgyRjFFMEFCPjw5MzI2Qjk4REM4NjQ2RTRCODI3MzZFQUEzOENEQjFBQj5dL0luZGV4WzcgMjhdL0luZm8gNiAwIFIvTGVuZ3RoIDgzL1ByZXYgNjcyODgvUm9vdCA4IDAgUi9TaXplIDM1L1R5cGUvWFPRg0K")}
24
  ]
25
 
26
  text1 = """
@@ -71,14 +75,13 @@ generation_config = GenerationConfig(
71
  top_p=0.95,
72
  )
73
 
74
- # Assuming we need a safer approach to defining documents, wrapping them as dictionaries and not relying on an undefined Document class.
75
  def generate(document_parts, prompt_text):
76
  model_service_client = ModelServiceClient()
77
  model_resource_name = model_service_client.model_path("akroda", "us-central1", "gemini-1.5-pro-001")
78
  response = model_service_client.generate_content(
79
  request=GenerateContentRequest(
80
  model=model_resource_name,
81
- documents=[{"content_type": doc["content_type"], "data": doc["data"]} for doc in document_parts],
82
  prompt=prompt_text,
83
  generation_config=generation_config,
84
  )
 
4
  from langchain_core.prompts import PromptTemplate
5
  from google.cloud import aiplatform
6
  from google.cloud.aiplatform_v1.services.model_service import ModelServiceClient
7
+ from google.cloud.aiplatform_v1.types import GenerateContentRequest, GenerationConfig
8
  import streamlit as st
9
 
10
+ # Function to pad base64 strings
11
+ def pad_base64(base64_string):
12
+ return base64_string + '=' * (-len(base64_string) % 4)
13
+
14
  # Initialize the Google AI Platform
15
  aiplatform.init(project="akroda", location="us-central1")
16
 
17
+ # Define the documents as dictionaries, ensuring correct padding
18
  documents = [
19
+ {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xLjQKJeODgxNz5dL1Jvb3QgMTU0IDAgUi9TaXplIDE2Nj4+CnN0YXJ0eHJlZgoyMTY0NjkKJSVFT0YK"))},
20
+ {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xLjQKJeLjz9MKNijU+PgpzdGFydHhyZWYKMTMxMDY0CiUlRU9GCg=="))},
21
+ {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xLjQKJeLjz9MKNiAwZDU0YTVlNzllMWRhYWY1ZDQ2YjI+XS9Sb290IDE3NyAwIFIvU2l6ZSAxODc+PgpzdGFydHhyZWYKMjA3NTk5CiUlRU9GCg=="))},
22
+ {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xLjQKJeLjz9ML1Jvb3QgMTg5IDAgUi9TaXplIDE5OT4+CnN0YXJ0eHJlZgoxOTgzNzMKJSVFT0YK"))},
23
+ {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xCcnCmVuZHN0cmVhbQplbmRvYmoKc3RhcnR4cmVmCjIwOTgyNQolJUVPRgo="))},
24
+ {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xLj+CnN0YXJ0eHJlZgoyMTk5MDYKJSVFT0YK"))},
25
+ {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xLjQKJiUlRU9GCg=="))},
26
+ {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xLjQKJe90IDMwOCAwIFIvU2l6ZSAzMTg+PgpzdGFydHhyZWYKMjcwNzU3CiUlRU9GCg=="))},
27
+ {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xLjUNJeLjz9MNCjcgMCBvYmoNPDwvTGluZWFyaXplZCAxL0wgNjc1NzgvTyA5L0UgNjAyNDYvTiAxL1QgNjcyODcvSCBbIDQ4MyAxNTRdPj4NZW5kb2JxDSAgICAgICAgICAgICAgICAgICAgDQoyMiAwIG9iag08PC9EZWNvZGVQYXJtczw8L0NvbHVtbnMgNC9QcmVkaWN0b3IgMTI+Pi9GaWx0ZXIvRmxhdGVEZWNvZGUvSURbPDE3NzU4MkJFODc4MzRFQjNBOEM3RkIzQTgyRjFFMEFCPjw5MzI2Qjk4REM4NjQ2RTRCODI3MzZFQUEzOENEQjFBQj5dL0luZGV4WzcgMjhdL0luZm8gNiAwIFIvTGVuZ3RoIDgzL1ByZXYgNjcyODgvUm9vdCA4IDAgUi9TaXplIDM1L1R5cGUvWFPRg0K"))}
28
  ]
29
 
30
  text1 = """
 
75
  top_p=0.95,
76
  )
77
 
 
78
  def generate(document_parts, prompt_text):
79
  model_service_client = ModelServiceClient()
80
  model_resource_name = model_service_client.model_path("akroda", "us-central1", "gemini-1.5-pro-001")
81
  response = model_service_client.generate_content(
82
  request=GenerateContentRequest(
83
  model=model_resource_name,
84
+ documents=document_parts,
85
  prompt=prompt_text,
86
  generation_config=generation_config,
87
  )