Spaces:

traversaal-ai-hackathon
/

maksimov_dudnik

Runtime error

App Files Files Community

Maksimov-Dmitry commited on Mar 17

Commit

d1a829e

•

1 Parent(s): eb025bc

app

Browse files

Files changed (17) hide show

.gitattributes +1 -0
app.py +250 -0
data/db/.lock +1 -0
data/db/collection/hotels/storage.sqlite +3 -0
data/db/meta.json +1 -0
requirements.txt +6 -0
src/__pycache__/prompts.cpython-310.pyc +0 -0
src/__pycache__/prompts.cpython-39.pyc +0 -0
src/__pycache__/rag.cpython-310.pyc +0 -0
src/__pycache__/rag.cpython-39.pyc +0 -0
src/__pycache__/retriever.cpython-310.pyc +0 -0
src/__pycache__/streamlit_utils.cpython-310.pyc +0 -0
src/__pycache__/streamlit_utils.cpython-39.pyc +0 -0
src/create_vector_db.py +279 -0
src/prompts.py +62 -0
src/retriever.py +92 -0
src/streamlit_utils.py +80 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.sqlite filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,250 @@

+from src import streamlit_utils
+from src.prompts import AGENT_SYSTEM_PROMPT, AGENT_USER_PROMPT, RAG_USER_PROMPT, TRAVERSIALAI_USER_PROMPT
+from src.retriever import Retriever
+import streamlit as st
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain.memory import ChatMessageHistory
+import re
+import requests
+import os
+from qdrant_client import QdrantClient
+collection_name = 'hotels'
+st.set_page_config(page_title="Hotels search chatbot", page_icon="⭐")
+st.header('Hotels search chatbot')
+st.write('[![view source code and description](https://img.shields.io/badge/view_source_code-gray?logo=github)](https://github.com/Maksimov-Dmitry/traversaal-ai-hackathon)')
+st.write('Developed by [Dmitry Maksimov](https://www.linkedin.com/in/maksimov-dmitry/), maksimov.dmitry.m@gmail.com and [Ilya Dudnik](https://www.linkedin.com/in/ilia-dudnik-5b8018271/), ilia.dudnik@fau.de')
+st.sidebar.header('Choose your preferences')
+n_hotels = st.sidebar.number_input('Number of hotels', min_value=1, max_value=10, value=3)
+@st.cache_resource
+def get_db_client(path='data/db'):
+    client = QdrantClient(path=path)
+    return client
+def add_new_info(chat_history, queries):
+    """After the user has changed any parameters (city, price, rating), we notify the Agent about it.
+        The information is added to the chat history.
+    Args:
+        chat_history: history of the chat
+        queries (list): list of queries that the user has changed
+    """
+    for query in queries:
+        chat_history.add_user_message(query)
+        chat_history.add_ai_message('Ok, got it!')
+def check_params(params):
+    """Check if the user has changed the parameters (city, price, rating).
+        If the user has changed the parameters, the corresponding queries are created.
+    Args:
+        params (dict): dictionary with the parameters
+    Returns:
+        list: list of queries that the user has changed
+    """
+    changed_params = []
+    if 'prev_params' not in st.session_state:
+        st.session_state.prev_params = {'city': '<BLANK>', 'price': '<BLANK>', 'rating': '<BLANK>'}
+    if st.session_state.prev_params['city'] != params['city']:
+        changed_params.append(f'I want to find hotels in {params["city"]}' if params['city'] else 'I want to find hotels in any city')
+    if st.session_state.prev_params['price'] != params['price']:
+        changed_params.append(f'I want to find hotels in price range {params["price"]}' if params['price'] else 'I want to find hotels in any price range')
+    if st.session_state.prev_params['rating'] != params['rating']:
+        changed_params.append(f'I want to find hotels with rating greater than {params["rating"]}')
+    st.session_state.prev_params = params
+    return changed_params
+def get_parameters(db_client):
+    """Get the parameters from the user (city, price, rating),
+         The provided metadata (in case it was provided by the user) is used in the MixedRetrieval from Qdrant vector DB
+    """
+    points, _ = db_client.scroll(
+        collection_name=collection_name,
+        limit=1e9,
+        with_payload=True,
+        with_vectors=False,
+    )
+    cities = ['Doest not matter'] + list(set([point.payload['city'] for point in points]))
+    city = st.sidebar.selectbox('City', list(cities), index=0)
+    if city == 'Doest not matter':
+        city = None
+    prices = ['Doest not matter'] + list(set([point.payload['price'] for point in points]))
+    price = st.sidebar.selectbox('Price', list(prices), index=0)
+    if price == 'Doest not matter':
+        price = None
+    rating = st.sidebar.slider('Min hotel rating', min_value=.0, max_value=5.0, value=4.5, step=.5)
+    return dict(city=city, price=price, rating=rating)
+class HotelsSearchChatbot:
+    """
+        This is the Agent class. It is responsible for the decision-making during conversation with the user.
+        Based on the user's query, the Agent decides which action to take and how to present result to the user.
+    """
+    def __init__(self, db_client):
+        streamlit_utils.configure_api_keys()
+        self.llm_model = "gpt-4-1106-preview"
+        self.temperature = 0.6
+        self.embeedings_model = "text-embedding-3-large"
+        self.rerank_model = 'rerank-multilingual-v2.0'
+        self.ares_api_key = os.environ.get("ARES_API_KEY")
+        self.db_client = db_client
+    def _traversialai(self, query):
+        """Acquiring information from the internet using the Traversaal.ai.
+        Args:
+            query (str): search query
+        Returns:
+            str: information from the internet based on the query
+        """
+        url = "https://api-ares.traversaal.ai/live/predict"
+        payload = {"query": [query]}
+        headers = {
+            "x-api-key": self.ares_api_key,
+            "content-type": "application/json"
+        }
+        response = requests.post(url, json=payload, headers=headers)
+        try:
+            return response.json()['data']['response_text']
+        except:
+            return None
+    def _get_action(self, text):
+        """Parse (read) the action and the action input from the response of the Agent
+        (after he made a decision what to do).
+        'action' and 'action_input' indicate whether we need to query additional tools
+        (vector DB, Traversaal AI) and how.
+        Args:
+            text (str): response of the Agent, which contains the action and the action input
+        Returns:
+            tuple: action, action input
+        """
+        action_pattern = r"Action:\s*(.*)\n"
+        action_input_pattern = r"Action Input:\s*(.*)"
+        action_match = re.search(action_pattern, text)
+        action_input_match = re.search(action_input_pattern, text)
+        action = action_match.group(1) if action_match else None
+        action_input = action_input_match.group(1) if action_input_match else None
+        return action, action_input
+    def _make_action(self, action, action_input, retriever, chain, chat_history, config, retriever_params):
+        """Take the action corresponding to 'action' and 'action input'. The 'action' can be one of the following:
+            'nothing' - Agent is capable of dealing on its own without use of additional tools,
+            'hotels_data_base' - Agent decides to get the information from the hotels vector DB,
+            'ares_api' - Agent requires additional information from the internet using the Traversaal.ai.
+        Args:
+            action (str): action to make
+            action_input (str): action input (formulated by Agent search query)
+            retriever (Retriever): Retriever object
+            chain (Chain): Chain object
+            chat_history (ChatMessageHistory): history of the chat
+            config (dict): handlers for a LangChain invoke method
+            retriever_params (dict): parameters for the Retriever
+        """
+        if action == 'nothing':
+            st.markdown(action_input)
+            return action_input
+        if action == 'hotels_data_base':
+            context = retriever(action_input, top_k=n_hotels, **retriever_params)
+            chat_history.add_user_message(RAG_USER_PROMPT.format(context=context, query=action_input))
+            response = chain.invoke({"messages": chat_history.messages}, config)
+            chat_history.messages.pop()
+            return response.content
+        if action == 'ares_api':
+            context = self._traversialai(action_input)
+            chat_history.add_user_message(TRAVERSIALAI_USER_PROMPT.format(context=context, query=action_input))
+            response = chain.invoke({"messages": chat_history.messages}, config)
+            chat_history.messages.pop()
+            return response.content
+        return None
+    @st.cache_resource
+    def setup_chain(_self):
+        retriever = Retriever(embedding_model=_self.embeedings_model, llm_model=_self.llm_model,
+                              rerank_model=_self.rerank_model, db_client=_self.db_client, db_collection=collection_name)
+        chat_history = ChatMessageHistory()
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    AGENT_SYSTEM_PROMPT,
+                ),
+                MessagesPlaceholder(variable_name="messages"),
+            ]
+        )
+        chat = ChatOpenAI(model=_self.llm_model, temperature=_self.temperature, streaming=True)
+        chain = prompt | chat
+        return chain, chat_history, retriever
+    @streamlit_utils.enable_chat_history
+    def main(self, params):
+        chain, chat_history, retriever = self.setup_chain()
+        user_query = st.chat_input(placeholder="Ask me anything!")
+        if user_query:
+            streamlit_utils.display_msg(user_query, 'user')
+            # add new info to the chat history
+            queries = check_params(params)
+            add_new_info(chat_history, queries)
+            # get the action and the action input based on the user's query
+            chat_history.add_user_message(AGENT_USER_PROMPT.format(input=user_query))
+            action_response = chain.invoke({"messages": chat_history.messages})
+            chat_history.messages.pop()
+            action, action_input = self._get_action(action_response.content)
+            with st.chat_message("assistant"):
+                st_cb = streamlit_utils.StreamHandler(st.empty())
+                # create response on the user's query
+                response = self._make_action(action, action_input,
+                                             retriever, chain, chat_history, {"callbacks": [st_cb]}, params)
+                chat_history.add_user_message(user_query)
+                if response is None:
+                    response = 'Sorry, I cannot help you with it. Could you rephrase your question?'
+                    st.markdown(response)
+                chat_history.add_ai_message(response)
+                st.session_state.messages.append({"role": "assistant", "content": response})
+if __name__ == "__main__":
+    db_client = get_db_client()
+    params = get_parameters(db_client)
+    obj = HotelsSearchChatbot(db_client)
+    obj.main(params)

data/db/.lock ADDED Viewed

	@@ -0,0 +1 @@


1	+ tmp lock file

data/db/collection/hotels/storage.sqlite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:deb2004afca01078aacc8036779b783e47f7f9c52d440a517b32eb81b892af97
+size 4726784

data/db/meta.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"collections": {"hotels": {"vectors": {"size": 3072, "distance": "Cosine", "hnsw_config": null, "quantization_config": null, "on_disk": null}, "shard_number": null, "sharding_method": null, "replication_factor": null, "write_consistency_factor": null, "on_disk_payload": null, "hnsw_config": null, "wal_config": null, "optimizers_config": null, "init_from": null, "quantization_config": null, "sparse_vectors": null}}, "aliases": {}}

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+langchain
+langchain-community
+langchain-openai
+qdrant-client
+openai
+cohere

src/__pycache__/prompts.cpython-310.pyc ADDED Viewed

Binary file (3.57 kB). View file

src/__pycache__/prompts.cpython-39.pyc ADDED Viewed

Binary file (3.36 kB). View file

src/__pycache__/rag.cpython-310.pyc ADDED Viewed

Binary file (2.6 kB). View file

src/__pycache__/rag.cpython-39.pyc ADDED Viewed

Binary file (3.07 kB). View file

src/__pycache__/retriever.cpython-310.pyc ADDED Viewed

Binary file (3.93 kB). View file

src/__pycache__/streamlit_utils.cpython-310.pyc ADDED Viewed

Binary file (2.54 kB). View file

src/__pycache__/streamlit_utils.cpython-39.pyc ADDED Viewed

Binary file (1.69 kB). View file

src/create_vector_db.py ADDED Viewed

	@@ -0,0 +1,279 @@

+import click
+from qdrant_client import QdrantClient, models
+from openai import OpenAI
+from tqdm import tqdm
+import json
+import requests
+import os
+from prompts import REVIEWS_SYSTEM_PROMPT, REVIEWS_USER_PROMPT
+TRIPADVISOR_API_KEY = os.environ.get('TRIPADVISOR_API_KEY')
+def save_json(data, path):
+    with open(path, "w") as outfile:
+        json.dump(data, outfile)
+def get_df(dataset_path, is_hf):
+    if is_hf:
+        from datasets import load_dataset
+        dataset = load_dataset(dataset_path)
+        return dataset['train'].to_pandas()
+    else:
+        import pandas as pd
+        return pd.read_csv(dataset_path)
+def _concat_reviews(df):
+    text = ''
+    for _, row in df.iterrows():
+        text += '\n'
+        if row.review_title:
+            text += '\nTitle:\n' + row.review_title
+        if row.review_text:
+            text += '\nReview:\n' + row.review_text
+    return text
+def create_reviews_symmary(df, model, hotels, pos_rate=4.0, neg_rate=4.0, n_reviews=6):
+    """Create a summary of reviews for each hotel, based on the most positive and most negative reviews.
+    Args:
+        df (pd.DataFrame): hotels dataset
+        model (str): OpenAI model name
+        hotels (list): list of hotels to create summaries for
+        pos_rate (float): minimum positive rate, inclusive
+        neg_rate (float): maximum negative rate, exclusive
+        n_reviews (int): number of reviews to consider for each category
+    Returns:
+        dict: hotel name -> reviews summary
+    """
+    df['review_text_len'] = df.review_text.str.len().fillna(value=0)
+    df['review_title_len'] = df.review_title.str.len().fillna(value=0)
+    client = OpenAI()
+    hotels_reviews_summary = {}
+    for hotel in tqdm(hotels):
+        temp = df[df.hotel_name.eq(hotel)]
+        temp_pos = temp[temp.rate >= pos_rate].nlargest(n_reviews, 'review_text_len')
+        temp_neg = temp[temp.rate < neg_rate].nlargest(n_reviews, 'review_text_len')
+        if len(temp_pos) == 0 and len(temp_neg) == 0:
+            temp_pos = temp.nlargest(n_reviews, 'review_title_len')
+        text = _concat_reviews(temp_pos) + _concat_reviews(temp_neg)
+        if text:
+            response = client.chat.completions.create(
+                model=model,
+                messages=[
+                    {"role": "system", "content": REVIEWS_SYSTEM_PROMPT},
+                    {"role": "user", "content": REVIEWS_USER_PROMPT.format(text=text)},
+                ]
+            )
+            hotels_reviews_summary[hotel] = response.choices[0].message.content
+        return hotels_reviews_summary
+def _get_loc_id(hotel):
+    """ Given a hotel name, receive location id.
+    In order to get the hotel info, we need to get the location id first.
+    Args:
+        hotel (str): hotel name
+    Returns:
+        str: location id
+    """
+    url = "https://api.content.tripadvisor.com/api/v1/location/search?key={key}&searchQuery={hotel}&category=hotels&language=en"
+    headers = {"accept": "application/json"}
+    response = requests.get(url.format(hotel=hotel, key=TRIPADVISOR_API_KEY), headers=headers)
+    try:
+        return response.json()['data'][0]['location_id']
+    except Exception as e:
+        print(f'{response.status_code=}')
+        print(f'{response.text=}')
+        print(f'Error: {e}')
+        return None
+def get_hotel_info(hotel):
+    """Get hotel info from TripAdvisor.
+        The following information is retrieved using the TripAdvisor API:
+            - rank
+            - ratings distributions
+            - subratings
+            - amenities
+    Args:
+        hotel (str): hotel name
+    Returns:
+        dict: hotel info
+    """
+    url = "https://api.content.tripadvisor.com/api/v1/location/{loc_id}/details?key={key}&language=en&currency=USD"
+    headers = {"accept": "application/json"}
+    loc_id = _get_loc_id(hotel)
+    if loc_id is None:
+        return None
+    response = requests.get(url.format(loc_id=loc_id, key=TRIPADVISOR_API_KEY), headers=headers)
+    try:
+        response = response.json()
+    except Exception as e:
+        print(f'{response.status_code=}')
+        print(f'{response.text=}')
+        print(f'Error: {e}')
+        return None
+    rank = response['ranking_data'].get('ranking_string')
+    reviews_ratings = response.get('review_rating_count')
+    subratings = {}
+    for d in response['subratings']:
+        subratings[response['subratings'][d]['name']] = response['subratings'][d]['value']
+    amenities = response.get('amenities', [])
+    return dict(
+        rank=rank,
+        reviews_ratings=reviews_ratings,
+        subratings=subratings,
+        amenities=amenities,
+    )
+def get_desc(hotel, data):
+    """Create a text description of the hotel based on the retrieved data from TripAdvisor.
+    Args:
+        hotel (str): hotel name
+        data (dict): hotel info
+    Returns:
+        str: hotel text description
+    """
+    rating = "Rating: "+str(data[hotel]['rank'])+". "
+    distr_ranks = "Rating distribution "
+    for key in data[hotel]['reviews_ratings'].keys():
+        distr_ranks += str(key) + ": " + str(data[hotel]['reviews_ratings'][key] + ", ")
+    distr_ranks = distr_ranks[:-2]+". "
+    sub_ranks = "Specific ratings: "
+    if 'rate_location' in data[hotel]['subratings'].keys():
+        sub_ranks += "Location " + data[hotel]['subratings']['rate_location'] + ", "
+    if 'rate_sleep' in data[hotel]['subratings'].keys():
+        sub_ranks += "Sleep " + data[hotel]['subratings']['rate_sleep'] + ", "
+    if 'rate_room' in data[hotel]['subratings'].keys():
+        sub_ranks += "Room " + data[hotel]['subratings']['rate_room'] + ", "
+    if 'rate_service' in data[hotel]['subratings'].keys():
+        sub_ranks += "Service " + data[hotel]['subratings']['rate_service'] + ", "
+    if 'rate_cleanliness' in data[hotel]['subratings'].keys():
+        sub_ranks += "Cleanliness " + data[hotel]['subratings']['rate_cleanliness']
+    sub_ranks += ". "
+    amenities = "Amenities available: "
+    for i in data[hotel]['amenities']:
+        amenities += str(i) + ", "
+    amenities = amenities[:-2] + "."
+    total_desc = rating + distr_ranks + sub_ranks + amenities
+    return total_desc
+def get_payload(hotel, df):
+    """Create a metadata which will be stored in the database.
+    Args:
+        hotel (str): hotel name
+        df (pd.DataFrame): hotels dataset
+    Returns:
+        dict: metadata
+    """
+    temp = df[df.hotel_name.eq(hotel)]
+    rating = temp.rating_value.value_counts().index[0]
+    city = temp.locality.value_counts().index[0]
+    country = temp.country.value_counts().index[0]
+    price = temp.price_range.str.split(' ').str[0].value_counts().index[0]
+    return dict(
+        hotel_name=hotel,
+        rating=rating,
+        city=city,
+        country=country,
+        price=price
+    )
+@click.command()
+@click.option('--dataset-path', default='traversaal-ai-hackathon/hotel_datasets', help='Path to the dataset.')
+@click.option('--is-hf', is_flag=True, default=True, help='Whether the dataset is in huggingface format, csv otherwise.')
+@click.option('--db-path', default='data/db', help='Path to the output database.')
+@click.option('--collection-name', default='hotels', help='Name of the collection in the database.')
+@click.option('--embeddings-model', default='text-embedding-3-large', help='Name of the model to use for embeddings.')
+@click.option('--embeddings-size', default=3072, help='Size of the embeddings.')
+@click.option('--reviews-model', default='gpt-3.5-turbo-0125', help='Name of the model to use for reviews summary.')
+def create_vector_db(dataset_path, is_hf, db_path, collection_name, embeddings_model, embeddings_size, reviews_model):
+    REVIEW_SUMMARIES_PATH = 'reviews_summary.json'
+    HOTELS_INFO_PATH = 'hotels_info.json'
+    df = get_df(dataset_path, is_hf)
+    # Create a collection if it does not exist and filter out hotels that are already in the collection
+    qdrant_client = QdrantClient(path=db_path)
+    if not qdrant_client.collection_exists(collection_name):
+        qdrant_client.create_collection(
+            collection_name=collection_name,
+            vectors_config=models.VectorParams(size=embeddings_size, distance=models.Distance.COSINE),
+        )
+        hotels = df.hotel_name.unique()
+    else:
+        docs, _ = qdrant_client.scroll(
+            collection_name=collection_name,
+            limit=1e9,
+            with_payload=True,
+            with_vectors=False,
+        )
+        hotels = set(df.hotel_name.unique()) - set([doc.payload['hotel_name'] for doc in docs])
+    if len(hotels) == 0:
+        return
+    # Create reviews summary using OpenAI
+    reviews_summary = create_reviews_symmary(df, reviews_model, hotels)
+    save_json(reviews_summary, REVIEW_SUMMARIES_PATH)
+    # Get hotel info from TripAdvisor
+    hotels_info = {}
+    for hotel in tqdm(hotels):
+        hotels_info[hotel] = get_hotel_info(hotel)
+    save_json(hotels_info, HOTELS_INFO_PATH)
+    # Create descriptions and payloads for each hotel
+    texts = []
+    payloads = []
+    for hotel in hotels:
+        trip_desc_hotel = get_desc(hotel, hotels_info)
+        review_hotel = reviews_summary.get(hotel)
+        payload = get_payload(hotel, df)
+        text = trip_desc_hotel if trip_desc_hotel else ''  + '\n' + review_hotel if review_hotel else ''
+        payload['description'] = text
+        payloads.append(payload)
+        texts.append(text)
+    # Create description embeddings and upsert them to the database
+    openai_client = OpenAI()
+    embeddings = openai_client.embeddings.create(input=texts, model=embeddings_model)
+    points = [
+        models.PointStruct(
+            id=idx,
+            vector=data.embedding,
+            payload=payload,
+        )
+        for idx, (data, payload) in enumerate(zip(embeddings.data, payloads))
+    ]
+    qdrant_client.upsert(collection_name, points)
+if __name__ == '__main__':
+    create_vector_db()

src/prompts.py ADDED Viewed

	@@ -0,0 +1,62 @@

+RAG_SYSTEM_PROMPT = "You are a helpful assistant, who recommends the hotels based only on my preferences."
+RAG_CONTEXT_TEMPLATE = """
+    {id}: {hotel_name}
+    {description}
+"""
+RAG_USER_PROMPT = """
+    Here are the information about most relevant hotels to my query
+    ---------------------
+    {context}
+    ---------------------
+    Present these results to me and justify the ranking (explain why a hotel matches my preferences). Don't draw ANY conclusion and don't based on own knowledge.
+    Query: {query}
+    Answer:
+"""
+AGENT_USER_PROMPT = """
+    Answer the following question as best you can. You have access to the following tools:
+    hotels_data_base: A tool which present information about most relevant hotels based on the query. The information contains pros and cons of the hotel based on reviews, reviews ratings and ammenities. It is usefull when user want to get hotels recommendations. In this case Action Input should be query which will be complete and usefull to retrive the most relevant hotels.
+    ares_api: An API which performs real-time internet searches. It can be usefull than you need specific information about the hotel or the locataion or smth else from the internet. In this case Action Input should be query which will be complete and usefull to retrive the information from the Internet.
+    nothing: If you are sure you can answer the user's query without additional tools. In this case Action Input should be just an answer.
+    Use the following format:
+    Question: the input question you must answer
+    Thought: you should always think about what to do
+    Action: the action to take, should be one of [hotels_data_base, ares_api, nothing]
+    Action Input: the input to the action
+    Begin!
+    Question: {input}
+    Thought:
+"""
+AGENT_SYSTEM_PROMPT = "You are a helpful assistant for a hotel recommendation system based on my preferences. Answer all questions to the best of your ability."
+REVIEWS_SYSTEM_PROMPT = "You are a helpful assistant. Your goal is to underpin the strong and the weak points (features, amenities). If you can't find strong or weak points, don't write ANYTHING about them. The information consists of hotel reviews, i.e. Title of the review and the Review itself."
+REVIEWS_USER_PROMPT = """{text} Good Example:
+    ### Strong Points:
+    - The hotel boasts a favorable location with sea views and proximity to Zeitinburnu train station.
+    - Upgraded rooms, fitness facilities, and the outdoor pool area are well-received.
+    - The staff, including specific individuals like Mr. Levent, Cihan, and Buse, have been commended for their service.
+    - Room cleanliness is frequently mentioned as a positive aspect.
+    ### Weak Points:
+    - Inconsistency in customer service, with some guests reporting a lack of assistance with luggage and unfriendly reception.
+    - Miscommunication regarding room rates and issues with overcharges.
+    - Some guests have found the hotel's amenities, such as the narrow balcony and the pool's restrictive rules, to be lacking.
+    - A few guests reported cleanliness issues in the bathroom and concerns with room repairs.
+"""
+TRAVERSIALAI_USER_PROMPT = """
+    Based on the information retrived from the internet, answer the following question as best you can.
+    ---------------------
+    {context}
+    ---------------------
+    Query: {query}
+    Answer:
+"""

src/retriever.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from openai import OpenAI
+import cohere
+from qdrant_client import models
+from src.prompts import RAG_CONTEXT_TEMPLATE
+class Retriever:
+    """Retriever class for retrieving documents from the database
+        For retrieving documents, the following steps are performed:
+            1. Create an embedding for the query
+            2. Get n documents from the database based on the query and filters (Mixed retrieval)
+            3. Rerank the documents based on the query and select top k documents, where k << n (ReRanking)
+            4. Create a context from the selected documents
+    """
+    def __init__(self, embedding_model, llm_model, rerank_model, db_client, db_collection='hotels'):
+        self.db_collection = db_collection
+        self.db_client = db_client
+        self.rerank_model = rerank_model
+        self.openai_client = OpenAI()
+        self.co = cohere.Client()
+        self.embedding_model = embedding_model
+        self.llm_model = llm_model
+        self.max_retrieved_docs = 13
+    def _get_documents(self, query, top_k, city, price, rating):
+        """Retrieve top n documents from the database based on the query and filters
+        Args:
+            query (str): query
+            top_k (int): number of documents to retrieve
+            city (str): city name
+            price (str): price range
+            rating (float): rating
+        Returns:
+            list: list of documents
+        """
+        embedding = self.openai_client.embeddings.create(input=query, model=self.embedding_model)
+        filtr = []
+        if city:
+            filtr.append(models.FieldCondition(key="city", match=models.MatchValue(value=city)))
+        if price:
+            filtr.append(models.FieldCondition(key="price", match=models.MatchValue(value=price)))
+        if rating:
+            filtr.append(models.FieldCondition(key="rating", range=models.Range(gte=rating)))
+        response = self.db_client.search(
+            collection_name=self.db_collection,
+            query_vector=embedding.data[0].embedding,
+            limit=top_k,
+            query_filter=models.Filter(
+                must=filtr
+            ),
+        )
+        return response
+    def _get_context(self, docs):
+        """Create a context from the retrieved documents
+        Args:
+            docs (list): list of documents
+        Returns:
+            str: context
+        """
+        context = ''
+        for i, doc in enumerate(docs, 1):
+            context += RAG_CONTEXT_TEMPLATE.format(id=i, hotel_name=doc.payload['hotel_name'], description=doc.payload['description'])
+        return context
+    def _reranker(self, docs, query, top_k):
+        """Rerank the retrieved documents using Cohere based on the query and select top k documents
+        Args:
+            docs (list): list of documents
+            query (str): query
+            top_k (int): number of documents to select
+        Returns:
+            list: list of reranked documents
+        """
+        texts = [doc.payload['description'] for doc in docs]
+        rerank_hits = self.co.rerank(query=query, documents=texts, top_n=top_k, model=self.rerank_model)
+        result = [docs[hit.index] for hit in rerank_hits[:top_k]]
+        return result
+    def __call__(self, query, top_k=3, city=None, price=None, rating=None):
+        docs = self._get_documents(query, top_k=max(self.max_retrieved_docs, top_k), city=city, price=price, rating=rating)
+        if len(docs) == 0:
+            return 'There are no such hotels'
+        docs = self._reranker(docs, query, top_k)
+        context = self._get_context(docs)
+        return context

src/streamlit_utils.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import os
+import streamlit as st
+from langchain.callbacks.base import BaseCallbackHandler
+class StreamHandler(BaseCallbackHandler):
+    def __init__(self, container, initial_text=""):
+        self.container = container
+        self.text = initial_text
+    def on_llm_new_token(self, token: str, **kwargs) -> None:
+        self.text += token
+        self.container.markdown(self.text)
+def enable_chat_history(func):
+    if os.environ.get("OPENAI_API_KEY"):
+        # to clear chat history after swtching chatbot
+        current_page = func.__qualname__
+        if "current_page" not in st.session_state:
+            st.session_state["current_page"] = current_page
+        if st.session_state["current_page"] != current_page:
+            try:
+                st.cache_resource.clear()
+                del st.session_state["current_page"]
+                del st.session_state["messages"]
+            except:
+                pass
+        # to show chat history on ui
+        if "messages" not in st.session_state:
+            st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
+        for msg in st.session_state["messages"]:
+            st.chat_message(msg["role"]).write(msg["content"])
+    def execute(*args, **kwargs):
+        func(*args, **kwargs)
+    return execute
+def display_msg(msg, author):
+    """Method to display message on the UI
+    Args:
+        msg (str): message to display
+        author (str): author of the message -user/assistant
+    """
+    st.session_state.messages.append({"role": author, "content": msg})
+    st.chat_message(author).write(msg)
+def configure_api_keys():
+    KEYS = ['OPENAI_API_KEY', 'CO_API_KEY', 'ARES_API_KEY']
+    st.sidebar.header('Api Keys Configuration')
+    st.markdown(
+        """
+    <style>
+        [title="Show password text"] {
+            display: none;
+        }
+    </style>
+    """,
+        unsafe_allow_html=True,
+    )
+    for key in KEYS:
+        if key in os.environ:
+            st.session_state[key] = os.environ[key]
+        api_key = st.sidebar.text_input(
+            label=key,
+            type="password",
+            value=st.session_state[key] if key in st.session_state else '',
+            placeholder="..."
+        )
+        if api_key:
+            st.session_state[key] = api_key
+            os.environ[key] = api_key
+        else:
+            st.error(f"Please add your {key} to continue.")
+            st.stop()