Spaces:

Marroco93
/

PacmanAI-2

Sleeping

File size: 6,048 Bytes

44cdc71
c108da3
caa64e7
c108da3
f84e083
 
 
bcee5ff
c108da3
7a31970
4849bdc
506bda4
04ac801
10d17a3
8737454
10d17a3
1b8c3e7
4849bdc
c108da3
4849bdc
f84e083
 
 
ce8dee8
d0435f3
f12ecf0
1667997
f84e083
 
 
 
e40242b
6b74d17
f84e083
 
 
1667997
 
 
 
 
 
 
 
 
 
c108da3
9441c54
c108da3
1667997
 
 
 
 
 
 
9441c54
 
1667997
9441c54
 
 
1667997
9441c54
 
c108da3
9441c54
c108da3
d0c61b6
215f4a9
c108da3
215f4a9
d0c61b6
f84e083
1667997
 
 
 
f84e083
 
c108da3
9441c54
d0c61b6
bcee5ff
 
1b8c3e7
10d17a3
bcee5ff
 
1b8c3e7
3717137
1b8c3e7
fe81f5c
 
 
1b8c3e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bcee5ff
1b8c3e7
 
bcee5ff
 
 
fe81f5c
 
bcee5ff
fe81f5c
 
 
 
b95f5d7
10d17a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8737454
10d17a3
 
 
27153aa
10d17a3

import re
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from huggingface_hub import InferenceClient
import uvicorn
from typing import Generator, List
import json  # Asegúrate de que esta línea esté al principio del archivo
import nltk
import os
import google.protobuf  # This line should execute without errors if protobuf is installed correctly
import sentencepiece
from transformers import pipeline, AutoTokenizer,AutoModelForSequenceClassification,AutoModel
import spacy
import numpy as np
import torch


nltk.data.path.append(os.getenv('NLTK_DATA'))

app = FastAPI()

# Initialize the InferenceClient with your model
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")


class Item(BaseModel):
    prompt: str
    history: list
    system_prompt: str
    temperature: float = 0.8
    max_new_tokens: int = 4000
    top_p: float = 0.15
    repetition_penalty: float = 1.0

def format_prompt(current_prompt, history):
    formatted_history = "<s>"
    for entry in history:
        if entry["role"] == "user":
            formatted_history += f"[USER] {entry['content']} [/USER]"
        elif entry["role"] == "assistant":
            formatted_history += f"[ASSISTANT] {entry['content']} [/ASSISTANT]"
    formatted_history += f"[USER] {current_prompt} [/USER]</s>"
    return formatted_history


def generate_stream(item: Item) -> Generator[bytes, None, None]:
    formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
    # Estimate token count for the formatted_prompt
    input_token_count = len(nltk.word_tokenize(formatted_prompt))  # NLTK tokenization

    # Ensure total token count doesn't exceed the maximum limit
    max_tokens_allowed = 32768
    max_new_tokens_adjusted = max(1, min(item.max_new_tokens, max_tokens_allowed - input_token_count))

    generate_kwargs = {
        "temperature": item.temperature,
        "max_new_tokens": max_new_tokens_adjusted,
        "top_p": item.top_p,
        "repetition_penalty": item.repetition_penalty,
        "do_sample": True,
        "seed": 42,
    }

    # Stream the response from the InferenceClient
    for response in client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True):
        # This assumes 'details=True' gives you a structure where you can access the text like this
        chunk = {
            "text": response.token.text,
            "complete": response.generated_text is not None  # Adjust based on how you detect completion
        }
        yield json.dumps(chunk).encode("utf-8") + b"\n"


class SummarizeRequest(BaseModel):
    text: str

@app.post("/generate/")
async def generate_text(item: Item):
    # Stream response back to the client
    return StreamingResponse(generate_stream(item), media_type="application/x-ndjson")

# Define request model
class TextRequest(BaseModel):
    text: str  # Single string of long text

# Load Longformer model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("allenai/longformer-base-4096")
model = AutoModel.from_pretrained("allenai/longformer-base-4096")

# Endpoint to process the document and return embeddings
@app.post("/process_document")
async def process_document(request: TextRequest):
    try:
        # Split the text into segments that fit within the model's max input size
        max_length = 4096  # Maximum token length for Longformer
        words = request.text.split()
        tokens = tokenizer.encode(request.text, add_special_tokens=True)
        input_ids = []
        current_chunk = []

        for token in tokens:
            if len(current_chunk) + len(tokenizer.convert_ids_to_tokens([token])) < max_length:
                current_chunk.append(token)
            else:
                input_ids.append(current_chunk)
                current_chunk = [token]

        if current_chunk:
            input_ids.append(current_chunk)  # Add the last chunk if any

        # Generate embeddings for each segment
        embeddings_list = []
        for ids in input_ids:
            inputs = {'input_ids': torch.tensor(ids).unsqueeze(0)}  # Batch size 1
            outputs = model(**inputs)
            embeddings = outputs.last_hidden_state.mean(dim=1).detach().numpy()
            embeddings_list.append(embeddings.tolist())  # Store embeddings for each segment

        return {
            "embeddings": embeddings_list
        }
    except Exception as e:
        print(f"Error during document processing: {e}")
        raise HTTPException(status_code=500, detail=str(e))

# @app.post("/summarize")
# async def summarize(request: TextRequest):
#     try:
#         # Preprocess and segment the text
#         processed_text = preprocess_text(request.text)
#         segments = segment_text(processed_text)

#         # Classify each segment safely
#         classified_segments = []
#         for segment in segments:
#             try:
#                 result = classifier(segment)
#                 classified_segments.append(result)
#             except Exception as e:
#                 print(f"Error classifying segment: {e}")
#                 classified_segments.append({"error": str(e)})

#         # Optional: Reduce tokens or summarize
#         reduced_texts = []
#         for segment in segments:
#             try:
#                 reduced_text, token_count = reduce_tokens(segment)
#                 reduced_texts.append((reduced_text, token_count))
#             except Exception as e:
#                 print(f"Error during token reduction: {e}")
#                 reduced_texts.append(("Error", 0))

#         return {
#             "classified_segments": classified_segments,
#             "reduced_texts": reduced_texts
#         }
    
#     except Exception as e:
#         print(f"Error during token reduction: {e}")
#         raise HTTPException(status_code=500, detail=str(e))

# if __name__ == "__main__":
#     uvicorn.run(app, host="0.0.0.0", port=8000)