Spaces:

Gooly
/

example-pipeline

Sleeping

File size: 5,334 Bytes

8b6399b
 
 
 
 
 
 
 
 
2145acc
 
8b6399b
 
 
6ad144b
 
8b6399b
6ad144b
 
 
2145acc
 
 
 
 
 
6ad144b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b6399b
16dcc46
6ad144b
 
16dcc46
8b6399b
 
6ad144b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2145acc
6ad144b
 
 
 
 
2145acc
 
6ad144b
 
 
 
 
242bba0
6ad144b
 
 
 
 
 
242bba0
6ad144b
 
242bba0
6ad144b
 
 
 
242bba0
6ad144b
242bba0
6ad144b
 
 
242bba0
6ad144b
 
242bba0
6ad144b
 
242bba0
6ad144b
242bba0
6ad144b
 
242bba0
6ad144b
 
 
242bba0
6ad144b
 
242bba0
6ad144b
242bba0
6ad144b
5b24b6b
6ad144b
242bba0
6ad144b
 
 
 
242bba0
6ad144b
 
 
 
 
8b6399b
6ad144b
8b6399b
6ad144b
 
 
8b6399b
6ad144b
 
8b6399b
6ad144b
 
8b6399b
6ad144b
df26c41
6ad144b
 
df26c41
6ad144b
 
 
df26c41
6ad144b
 
8b6399b
6ad144b
 
8b6399b
6ad144b

import streamlit as st
import os
from io import StringIO
from llama_index.llms import HuggingFaceInferenceAPI
from llama_index.embeddings import HuggingFaceInferenceAPIEmbedding
from llama_index import ServiceContext, VectorStoreIndex
from llama_index.schema import Document
import uuid
from llama_index.vector_stores.types import MetadataFilters, ExactMatchFilter
from typing import List
from pydantic import BaseModel

inference_api_key = st.secrets["INFRERENCE_API_TOKEN"]

# embed_model_name = st.text_input(
#     'Embed Model name', "Gooly/gte-small-en-fine-tuned-e-commerce")

# llm_model_name = st.text_input(
#     'Embed Model name', "mistralai/Mistral-7B-Instruct-v0.2")


class PriceModel(BaseModel):
    """Data model for price"""
    price: str


embed_model_name = "jinaai/jina-embedding-s-en-v1"
llm_model_name = "mistralai/Mistral-7B-Instruct-v0.2"

llm = HuggingFaceInferenceAPI(
    model_name=llm_model_name, token=inference_api_key)

embed_model = HuggingFaceInferenceAPIEmbedding(
    model_name=embed_model_name,
    token=inference_api_key,
    model_kwargs={"device": ""},
    encode_kwargs={"normalize_embeddings": True},
)

service_context = ServiceContext.from_defaults(
    embed_model=embed_model, llm=llm)

query = st.text_input(
    'Query', "What is the price of the product?"
)

html_file = st.file_uploader("Upload a html file", type=["html"])

if html_file is not None:
    stringio = StringIO(html_file.getvalue().decode("utf-8"))
    string_data = stringio.read()
    with st.expander("Uploaded HTML"):
        st.write(string_data)

    document_id = str(uuid.uuid4())

    document = Document(text=string_data)
    document.metadata["id"] = document_id
    documents = [document]

    filters = MetadataFilters(
        filters=[ExactMatchFilter(key="id", value=document_id)])

    index = VectorStoreIndex.from_documents(
        documents, show_progress=True, metadata={"source": "HTML"}, service_context=service_context)

    query_engine = index.as_query_engine(
        filters=filters, service_context=service_context, response_mode="tree_summarize", output_cls=PriceModel)

    response = query_engine.query(query)

    st.write(response.response)

    st.write(f'Price: {response.price}')

# if st.button('Start Pipeline'):
#     if html_file is not None and embed_model_name is not None and llm_model_name is not None and query is not None:
#         st.write('Running Pipeline')
#         llm = HuggingFaceInferenceAPI(
#             model_name=llm_model_name, token=inference_api_key)

#         embed_model = HuggingFaceInferenceAPIEmbedding(
#             model_name=embed_model_name,
#             token=inference_api_key,
#             model_kwargs={"device": ""},
#             encode_kwargs={"normalize_embeddings": True},
#         )

#         service_context = ServiceContext.from_defaults(
#             embed_model=embed_model, llm=llm)

#         stringio = StringIO(html_file.getvalue().decode("utf-8"))
#         string_data = stringio.read()
#         with st.expander("Uploaded HTML"):
#             st.write(string_data)

#         document_id = str(uuid.uuid4())

#         document = Document(text=string_data)
#         document.metadata["id"] = document_id
#         documents = [document]

#         filters = MetadataFilters(
#             filters=[ExactMatchFilter(key="id", value=document_id)])

#         index = VectorStoreIndex.from_documents(
#             documents, show_progress=True, metadata={"source": "HTML"}, service_context=service_context)

#         retriever = index.as_retriever()

#         ranked_nodes = retriever.retrieve(
#             query)

#         with st.expander("Ranked Nodes"):
#             for node in ranked_nodes:
#                 st.write(node.node.get_content(), "-> Score:", node.score)

#         query_engine = index.as_query_engine(
#             filters=filters, service_context=service_context)

#         response = query_engine.query(query)

#         st.write(response.response)

#         st.write(response.source_nodes)

#     else:
#         st.error('Please fill in all the fields')
# else:
#     st.write('Press start to begin')

# # if html_file is not None:
# #     stringio = StringIO(html_file.getvalue().decode("utf-8"))
# #     string_data = stringio.read()
# #     with st.expander("Uploaded HTML"):
# #         st.write(string_data)

# #     document_id = str(uuid.uuid4())

# #     document = Document(text=string_data)
# #     document.metadata["id"] = document_id
# #     documents = [document]

# #     filters = MetadataFilters(
# #         filters=[ExactMatchFilter(key="id", value=document_id)])

# #     index = VectorStoreIndex.from_documents(
# #         documents, show_progress=True, metadata={"source": "HTML"}, service_context=service_context)

# #     retriever = index.as_retriever()

# #     ranked_nodes = retriever.retrieve(
# #         "Get me all the information about the product")

# #     with st.expander("Ranked Nodes"):
# #         for node in ranked_nodes:
# #             st.write(node.node.get_content(), "-> Score:", node.score)

# #     query_engine = index.as_query_engine(
# #         filters=filters, service_context=service_context)

# #     response = query_engine.query(
# #         "Get me all the information about the product")

# #     st.write(response)