import pandas as pd import base64 from img2table.document import Image from img2table.ocr import DocTR from langchain.schema.messages import HumanMessage, AIMessage from langchain_experimental.agents import create_pandas_dataframe_agent from langchain_openai import ChatOpenAI from dotenv import load_dotenv load_dotenv() def encode_image(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') path = '../NutriGenMe-Testing/ukmss-1.png' vision = ChatOpenAI(model="gpt-4-vision-preview", max_tokens=4096) img = encode_image(path) msg = vision.invoke( [ AIMessage(content="You are an experienced doctor specializing in genomics and want to identify names of genes, SNPs, and their related diseases based on the tables given."), HumanMessage( content=[ { "type": "text", "text": 'You will be provided with the image of a table. Extract all genes / locus names with its respective rsID / SNP and potential diseases in curly brackets like this: {"Genes" : "", "SNPs" : "", "Diseases" : ""}.' }, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{img}", "detail": "low" }, }, ] ) ] ) print(msg.content) # exit() image = Image(path) ocr = DocTR() extracted_tables = image.extract_tables(ocr=ocr, implicit_rows=True, borderless_tables=True, min_confidence=0) df = extracted_tables[0].df for et in extracted_tables[1:]: df = pd.concat([df, et.df]).reset_index(drop=True) print(df) llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0) agent = create_pandas_dataframe_agent(llm, df, verbose=True) agent_output = agent.invoke("Is this table contain Gene names?") print(agent_output)