Spaces:
Sleeping
Sleeping
no message
Browse files
main.py
CHANGED
@@ -96,12 +96,23 @@ def split_text_by_tokens(text, max_tokens=1024):
|
|
96 |
print("Tokenization complete.")
|
97 |
return chunks, token_counts
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
@app.post("/summarize")
|
100 |
async def summarize_text(request: SummarizeRequest):
|
101 |
try:
|
102 |
-
|
103 |
-
|
104 |
-
return JSONResponse(content={"chunks": chunk_data})
|
105 |
except Exception as e:
|
106 |
print(f"Error during tokenization: {e}")
|
107 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
96 |
print("Tokenization complete.")
|
97 |
return chunks, token_counts
|
98 |
|
99 |
+
def summarize_large_text(text):
|
100 |
+
chunks, token_counts = split_text_by_tokens(text, max_tokens=1024 - 10) # Slight buffer to avoid edge cases
|
101 |
+
summaries = []
|
102 |
+
for chunk in chunks:
|
103 |
+
# Perform summarization on the chunk
|
104 |
+
summary = summarizer(chunk, max_length=500, min_length=100, do_sample=False)
|
105 |
+
if summary:
|
106 |
+
summaries.append(summary[0]['summary_text'])
|
107 |
+
combined_summary = ' '.join(summaries)
|
108 |
+
return combined_summary
|
109 |
+
|
110 |
+
|
111 |
@app.post("/summarize")
|
112 |
async def summarize_text(request: SummarizeRequest):
|
113 |
try:
|
114 |
+
summarized_text = summarize_large_text(request.text)
|
115 |
+
return JSONResponse(content={"summary": summarized_text})
|
|
|
116 |
except Exception as e:
|
117 |
print(f"Error during tokenization: {e}")
|
118 |
raise HTTPException(status_code=500, detail=str(e))
|