Spaces:

bertugmirasyedi
/

aristotle-api

Sleeping

App Files Files Community

bertugmirasyedi commited on Apr 1, 2023

Commit

f9baad9

1 Parent(s): 079594f

Divided singular endpoint for each function.

Browse files

Files changed (3) hide show

.DS_Store +0 -0
__pycache__/app.cpython-310.pyc +0 -0
app.py +192 -90

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

__pycache__/app.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-310.pyc and b/__pycache__/app.cpython-310.pyc differ

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.middleware.cors import CORSMiddleware
 # Define the FastAPI app
 app = FastAPI(docs_url="/")
@@ -14,16 +15,18 @@ app.add_middleware(
     allow_headers=["*"],
 )
 @app.get("/search")
-def search(
     query: str,
-    classification: bool = True,
-    summarization: bool = True,
-    similarity: bool = False,
     add_chatgpt_results: bool = False,
     n_results: int = 10,
 ):
     import time
     import requests
@@ -42,7 +45,12 @@ def search(
         """
         # Set the API endpoint and query parameters
         url = "https://www.googleapis.com/books/v1/volumes"
-        params = {"q": str(query), "printType": "books", "maxResults": n_results}
         # Send a GET request to the API with the specified parameters
         response = requests.get(url, params=params)
@@ -132,32 +140,41 @@ def search(
         images = []
         # Get the titles, descriptions, and publishers and append them to the lists
-        for result in openalex_results[0]:
-            try:
-                titles.append(result["title"])
-            except KeyError:
-                titles.append("Null")
-            try:
-                descriptions.append(result["abstract"])
-            except KeyError:
-                descriptions.append("Null")
-            try:
-                publishers.append(result["host_venue"]["publisher"])
-            except KeyError:
-                publishers.append("Null")
-            try:
-                authors.append(result["authorships"][0]["author"]["display_name"])
-            except KeyError:
-                authors.append("Null")
             images.append(
                 "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png"
             )
-            return titles, authors, publishers, descriptions, images
     # Run the openalex_search function
     (
@@ -192,8 +209,6 @@ def search(
         descriptions = []
         images = []
-        # Set the OpenAI API key
-        openai.api_key = "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"
         # Set the OpenAI API key
         openai.api_key = "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"
@@ -276,85 +291,172 @@ def search(
     third_checkpoint = time.time()
     third_checkpoint_time = int(third_checkpoint - second_checkpoint)
     # Combine title, description, and publisher into a single string
     combined_data = [
         f"The book's title is {title}. It is published by {publisher}. This book is about {description}"
         for title, description, publisher in zip(titles, descriptions, publishers)
     ]
-    def find_similar(combined_data, top_k=10):
-        """
-        Calculate the similarity between the books and return the top_k results.
-        """
-        from sentence_transformers import SentenceTransformer
-        from sentence_transformers import util
-        sentence_transformer = SentenceTransformer("all-MiniLM-L6-v2")
-        book_embeddings = sentence_transformer.encode(
-            combined_data, convert_to_tensor=True
         )
-        # Make sure that the top_k value is not greater than the number of books
-        top_k = len(combined_data) if top_k > len(combined_data) else top_k
-        similar_books = []
-        for i in range(len(combined_data)):
-            # Get the embedding for the ith book
-            current_embedding = book_embeddings[i]
-            # Calculate the similarity between the ith book and the rest of the books
-            similarity_sorted = util.semantic_search(
-                current_embedding, book_embeddings, top_k=top_k
-            )
-            # Append the results to the list
-            similar_books.append(
-                {
-                    "sorted_by_similarity": similarity_sorted[0][1:],
-                }
-            )
-        return similar_books
-    def summarize(descriptions, runtime="normal"):
-        """
-        Summarize the descriptions and return the results.
-        """
-        from transformers import (
-            AutoTokenizer,
-            AutoModelForSeq2SeqLM,
-            pipeline,
-        )
-        from optimum.onnxruntime import ORTModelForSeq2SeqLM
-        from optimum.bettertransformer import BetterTransformer
-        # Define the summarizer model and tokenizer
-        if runtime == "normal":
-            tokenizer = AutoTokenizer.from_pretrained("lidiya/bart-base-samsum")
-            model = AutoModelForSeq2SeqLM.from_pretrained("lidiya/bart-base-samsum")
-            model = BetterTransformer.transform(model)
-        elif runtime == "onnxruntime":
-            tokenizer = AutoTokenizer.from_pretrained("optimum/t5-small")
-            model = ORTModelForSeq2SeqLM.from_pretrained("optimum/t5-small")
-        # Create the summarizer pipeline
-        summarizer_pipe = pipeline(
-            "summarization",
-            model=model,
-            tokenizer=tokenizer,
-            min_length=10,
-            max_length=128,
         )
-        # Summarize the descriptions
-        summaries = [
-            summarizer_pipe(description)
-            if (len(description) > 0)
-            else [{"summary_text": "No summary text is available."}]
-            for description in descriptions
-        ]
-        return summaries
     def classify(combined_data, runtime="normal"):
         """

 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
+from fastapi.encoders import jsonable_encoder
 # Define the FastAPI app
 app = FastAPI(docs_url="/")
     allow_headers=["*"],
 )
+key = "AIzaSyCEiSxvAfXHAXNE2Q5b95vBpwjlbjl5GO8"
 @app.get("/search")
+async def search(
     query: str,
     add_chatgpt_results: bool = False,
     n_results: int = 10,
 ):
+    """
+    Get the results from the Google Books API, OpenAlex, and optionally OpenAI.
+    """
     import time
     import requests
         """
         # Set the API endpoint and query parameters
         url = "https://www.googleapis.com/books/v1/volumes"
+        params = {
+            "q": str(query),
+            "printType": "books",
+            "maxResults": n_results,
+            "key": key,
+        }
         # Send a GET request to the API with the specified parameters
         response = requests.get(url, params=params)
         images = []
         # Get the titles, descriptions, and publishers and append them to the lists
+        try:
+            for result in openalex_results[0]:
+                try:
+                    titles.append(result["title"])
+                except KeyError:
+                    titles.append("Null")
+                try:
+                    descriptions.append(result["abstract"])
+                except KeyError:
+                    descriptions.append("Null")
+                try:
+                    publishers.append(result["host_venue"]["publisher"])
+                except KeyError:
+                    publishers.append("Null")
+                try:
+                    authors.append(result["authorships"][0]["author"]["display_name"])
+                except KeyError:
+                    authors.append("Null")
+                images.append(
+                    "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png"
+                )
+        except IndexError:
+            titles.append("Null")
+            descriptions.append("Null")
+            publishers.append("Null")
+            authors.append("Null")
             images.append(
                 "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png"
             )
+        return titles, authors, publishers, descriptions, images
     # Run the openalex_search function
     (
         descriptions = []
         images = []
         # Set the OpenAI API key
         openai.api_key = "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"
     third_checkpoint = time.time()
     third_checkpoint_time = int(third_checkpoint - second_checkpoint)
+    results = [
+        {
+            "title": title,
+            "author": author,
+            "publisher": publisher,
+            "description": description,
+            "image": image,
+        }
+        for title, author, publisher, description, image in zip(
+            titles, authors, publishers, descriptions, images
+        )
+    ]
+    response = {"results": results}
+    return response
+@app.post("/classify")
+async def classify(data: dict, runtime: str = "normal"):
+    """
+    Create classifier pipeline and return the results.
+    """
+    titles = [book["title"] for book in data["results"]]
+    descriptions = [book["description"] for book in data["results"]]
+    publishers = [book["publisher"] for book in data["results"]]
     # Combine title, description, and publisher into a single string
     combined_data = [
         f"The book's title is {title}. It is published by {publisher}. This book is about {description}"
         for title, description, publisher in zip(titles, descriptions, publishers)
     ]
+    from transformers import (
+        AutoTokenizer,
+        AutoModelForSequenceClassification,
+        pipeline,
+    )
+    from optimum.onnxruntime import ORTModelForSequenceClassification
+    from optimum.bettertransformer import BetterTransformer
+    if runtime == "normal":
+        # Define the zero-shot classifier
+        tokenizer = AutoTokenizer.from_pretrained(
+            "sileod/deberta-v3-base-tasksource-nli"
+        )
+        model = AutoModelForSequenceClassification.from_pretrained(
+            "sileod/deberta-v3-base-tasksource-nli"
+        )
+    elif runtime == "onnxruntime":
+        tokenizer = AutoTokenizer.from_pretrained(
+            "optimum/distilbert-base-uncased-mnli"
+        )
+        model = ORTModelForSequenceClassification.from_pretrained(
+            "optimum/distilbert-base-uncased-mnli"
         )
+    classifier_pipe = pipeline(
+        "zero-shot-classification",
+        model=model,
+        tokenizer=tokenizer,
+        hypothesis_template="This book is {}.",
+        batch_size=1,
+        device=-1,
+        multi_label=False,
+    )
+    # Define the candidate labels
+    level = [
+        "Introductory",
+        "Advanced",
+    ]
+    audience = ["Academic", "Not Academic", "Manual"]
+    classes = [
+        {
+            "audience": classifier_pipe(doc, audience)["labels"][0],
+            "level": classifier_pipe(doc, level)["scores"][0],
+        }
+        for doc in combined_data
+    ]
+    return classes
+@app.post("/find_similar")
+async def find_similar(data: dict, runtime: str = "normal", top_k: int = 5):
+    """
+    Calculate the similarity between the books and return the top_k results.
+    """
+    from sentence_transformers import SentenceTransformer
+    from sentence_transformers import util
+    titles = [book["title"] for book in data["results"]]
+    descriptions = [book["description"] for book in data["results"]]
+    publishers = [book["publisher"] for book in data["results"]]
+    # Combine title, description, and publisher into a single string
+    combined_data = [
+        f"The book's title is {title}. It is published by {publisher}. This book is about {description}"
+        for title, description, publisher in zip(titles, descriptions, publishers)
+    ]
+    sentence_transformer = SentenceTransformer("all-MiniLM-L6-v2")
+    book_embeddings = sentence_transformer.encode(combined_data, convert_to_tensor=True)
+    # Make sure that the top_k value is not greater than the number of books
+    top_k = len(combined_data) if top_k > len(combined_data) else top_k
+    similar_books = []
+    for i in range(len(combined_data)):
+        # Get the embedding for the ith book
+        current_embedding = book_embeddings[i]
+        # Calculate the similarity between the ith book and the rest of the books
+        similarity_sorted = util.semantic_search(
+            current_embedding, book_embeddings, top_k=top_k
         )
+        # Append the results to the list
+        similar_books.append(
+            {
+                "sorted_by_similarity": similarity_sorted[0][1:],
+            }
+        )
+    response = {"results": similar_books}
+    return response
+@app.post("/summarize")
+async def summarize(descriptions: list, runtime="normal"):
+    """
+    Summarize the descriptions and return the results.
+    """
+    from transformers import (
+        AutoTokenizer,
+        AutoModelForSeq2SeqLM,
+        pipeline,
+    )
+    from optimum.onnxruntime import ORTModelForSeq2SeqLM
+    from optimum.bettertransformer import BetterTransformer
+    # Define the summarizer model and tokenizer
+    if runtime == "normal":
+        tokenizer = AutoTokenizer.from_pretrained("lidiya/bart-base-samsum")
+        model = AutoModelForSeq2SeqLM.from_pretrained("lidiya/bart-base-samsum")
+        model = BetterTransformer.transform(model)
+    elif runtime == "onnxruntime":
+        tokenizer = AutoTokenizer.from_pretrained("optimum/t5-small")
+        model = ORTModelForSeq2SeqLM.from_pretrained("optimum/t5-small")
+    # Create the summarizer pipeline
+    summarizer_pipe = pipeline("summarization", model=model, tokenizer=tokenizer)
+    # Summarize the descriptions
+    summaries = [
+        summarizer_pipe(description)
+        if (len(description) > 0 and description != "Null")
+        else [{"summary_text": "No summary text is available."}]
+        for description in descriptions
+    ]
+    return summaries
     def classify(combined_data, runtime="normal"):
         """