Marroco93 commited on
Commit
d33d65c
1 Parent(s): c1fff5f

no message

Browse files
Files changed (1) hide show
  1. main.py +14 -3
main.py CHANGED
@@ -96,12 +96,23 @@ def split_text_by_tokens(text, max_tokens=1024):
96
  print("Tokenization complete.")
97
  return chunks, token_counts
98
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  @app.post("/summarize")
100
  async def summarize_text(request: SummarizeRequest):
101
  try:
102
- chunks, token_counts = split_text_by_tokens(request.text, max_tokens=1024 - 10) # Slight buffer to avoid edge cases
103
- chunk_data = [{'chunk': chunk, 'tokens': count} for chunk, count in zip(chunks, token_counts)]
104
- return JSONResponse(content={"chunks": chunk_data})
105
  except Exception as e:
106
  print(f"Error during tokenization: {e}")
107
  raise HTTPException(status_code=500, detail=str(e))
 
96
  print("Tokenization complete.")
97
  return chunks, token_counts
98
 
99
+ def summarize_large_text(text):
100
+ chunks, token_counts = split_text_by_tokens(text, max_tokens=1024 - 10) # Slight buffer to avoid edge cases
101
+ summaries = []
102
+ for chunk in chunks:
103
+ # Perform summarization on the chunk
104
+ summary = summarizer(chunk, max_length=500, min_length=100, do_sample=False)
105
+ if summary:
106
+ summaries.append(summary[0]['summary_text'])
107
+ combined_summary = ' '.join(summaries)
108
+ return combined_summary
109
+
110
+
111
  @app.post("/summarize")
112
  async def summarize_text(request: SummarizeRequest):
113
  try:
114
+ summarized_text = summarize_large_text(request.text)
115
+ return JSONResponse(content={"summary": summarized_text})
 
116
  except Exception as e:
117
  print(f"Error during tokenization: {e}")
118
  raise HTTPException(status_code=500, detail=str(e))