File size: 2,950 Bytes
884908f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import type { ChatMessage } from "gpt-tokenizer/GptEncoding";
import {
  getQuery,
  getTextGenerationState,
  updateResponse,
  updateTextGenerationState,
} from "./pubSub";
import { getSearchTokenHash } from "./searchTokenHash";
import { getSystemPrompt } from "./systemPrompt";
import {
  ChatGenerationError,
  canStartResponding,
  getDefaultChatCompletionCreateParamsStreaming,
  getFormattedSearchResults,
} from "./textGenerationUtilities";

export async function generateTextWithInternalApi() {
  await canStartResponding();
  updateTextGenerationState("preparingToGenerate");

  const messages: ChatMessage[] = [
    {
      role: "user",
      content: getSystemPrompt(getFormattedSearchResults(true)),
    },
    { role: "assistant", content: "Ok!" },
    { role: "user", content: getQuery() },
  ];

  const streamedMessage = await processStreamResponse(messages, (message) => {
    if (getTextGenerationState() === "interrupted") {
      throw new ChatGenerationError("Generation interrupted");
    }

    if (getTextGenerationState() !== "generating") {
      updateTextGenerationState("generating");
    }

    updateResponse(message);
  });

  updateResponse(streamedMessage);
}

export async function generateChatWithInternalApi(
  messages: ChatMessage[],
  onUpdate: (partialResponse: string) => void,
) {
  return processStreamResponse(messages, (message) => {
    onUpdate(message);
    if (getTextGenerationState() === "interrupted") {
      throw new ChatGenerationError("Chat generation interrupted");
    }
  });
}

async function processStreamResponse(
  messages: ChatMessage[],
  onChunk: (message: string) => void,
): Promise<string> {
  const inferenceUrl = new URL("/inference", self.location.origin);
  const tokenPrefix = "Bearer ";
  const token = await getSearchTokenHash();

  const response = await fetch(inferenceUrl.toString(), {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
      Authorization: `${tokenPrefix}${token}`,
    },
    body: JSON.stringify({
      ...getDefaultChatCompletionCreateParamsStreaming(),
      messages,
    }),
  });

  if (!response.ok || !response.body) {
    throw new Error(`HTTP error! status: ${response.status}`);
  }

  const reader = response.body.getReader();
  const decoder = new TextDecoder("utf-8");
  let streamedMessage = "";

  while (true) {
    const { done, value } = await reader.read();
    if (done) break;

    const chunk = decoder.decode(value);
    const lines = chunk.split("\n");
    const parsedLines = lines
      .map((line) => line.replace(/^data: /, "").trim())
      .filter((line) => line !== "" && line !== "[DONE]")
      .map((line) => JSON.parse(line));

    for (const parsedLine of parsedLines) {
      const deltaContent = parsedLine.choices[0].delta.content;
      if (deltaContent) {
        streamedMessage += deltaContent;
        onChunk(streamedMessage);
      }
    }
  }

  return streamedMessage;
}