import os KEY = os.environ.get("openai_key") from typing import Dict import pandas as pd import gradio as gr from scrapeghost import SchemaScraper import openai # import json os.environ['OPENAI_API_KEY'] = KEY openai.api_key = KEY def desc_to_schema(description: str) -> Dict: # response = openai.ChatCompletion.create( # model="gpt-4", # messages=[{ # "role": # "user", # "content": # f"""Can you create a JSON schema for what this problem statement is telling. {description} in JSON format. {{"input": {{'[input field name]': '[type of data]'}},"output": {{'[output field name 1]: '[type of data]','[output field name 2]: '[type of data]',....}}. Output key must be a flat dict with just the name and its type and nothing else""" # }], # temperature=0.01, # max_tokens=256, # top_p=1, # frequency_penalty=0, # presence_penalty=0) # return json.loads(response['choices'][0]['message']['content'].strip()) return {"people": [{ "name": "string", "streak": "number", "profileURL": "string" }]} def schema_to_scrape(schema_var: Dict, url: str): scrape_links = SchemaScraper( schema={"output": schema_var}, models=['gpt-3.5-turbo-16k'] ) resp = scrape_links(url) return resp.data def scraped_to_df(scraped_raw) -> pd.DataFrame: return pd.DataFrame.from_records(scraped_raw['output']['people']) def e2e_flow(description: str, url: str) -> pd.DataFrame: schema = desc_to_schema(description) scraped_data = schema_to_scrape(schema, url) output_df = scraped_to_df(scraped_data) return output_df demo = gr.Interface( fn=e2e_flow, inputs=["text", "text"], outputs=[gr.Dataframe(label="Scraped data")], examples= [[ 'Get top 10 streakers and their names', 'https://www.producthunt.com/visit-streaks' ]], title="Data Input Automater", description="Explain your task and leave us the rest.") demo.launch()