matt HOFFNER commited on
Commit
054d282
1 Parent(s): 25cad06
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ node_modules/
2
+ .next/
Dockerfile CHANGED
@@ -1,24 +1,62 @@
1
- FROM nvidia/cuda:11.6.0-base-ubuntu20.04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  WORKDIR /app
3
- RUN export PATH="/usr/local/cuda/bin:$PATH"
4
- RUN apt update && \
5
- apt install --no-install-recommends -y build-essential python3 python3-pip wget && \
6
- apt clean && rm -rf /var/lib/apt/lists/*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
 
 
8
 
9
- RUN wget -qO- "https://cmake.org/files/v3.17/cmake-3.17.0-Linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local
10
- RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --no-cache-dir
11
- COPY requirements.txt ./
12
- RUN pip install --upgrade pip && \
13
- pip install -r requirements.txt
14
 
15
- EXPOSE 7860
 
 
 
16
 
17
- RUN useradd -m -u 1000 user
18
- USER user
19
 
20
- COPY --chown=user . $HOME/app
21
 
22
- RUN ls -al
23
 
24
- CMD ["python3", "app.py", "--host", "127.0.0.1", "--port", "7860"]
 
1
+ FROM node:18 AS base
2
+
3
+ # Install dependencies only when needed
4
+ FROM base AS deps
5
+
6
+ WORKDIR /app
7
+
8
+ # Install dependencies based on the preferred package manager
9
+ COPY package.json yarn.lock* package-lock.json* pnpm-lock.yaml* ./
10
+ RUN \
11
+ if [ -f yarn.lock ]; then yarn --frozen-lockfile; \
12
+ elif [ -f package-lock.json ]; then npm ci; \
13
+ elif [ -f pnpm-lock.yaml ]; then yarn global add pnpm && pnpm i --frozen-lockfile; \
14
+ else echo "Lockfile not found." && exit 1; \
15
+ fi
16
+
17
+ # Uncomment the following lines if you want to use a secret at buildtime,
18
+ # for example to access your private npm packages
19
+ # RUN --mount=type=secret,id=HF_EXAMPLE_SECRET,mode=0444,required=true \
20
+ # $(cat /run/secrets/HF_EXAMPLE_SECRET)
21
+
22
+ # Rebuild the source code only when needed
23
+ FROM base AS builder
24
  WORKDIR /app
25
+ COPY --from=deps /app/node_modules ./node_modules
26
+ COPY . .
27
+
28
+ # Next.js collects completely anonymous telemetry data about general usage.
29
+ # Learn more here: https://nextjs.org/telemetry
30
+ # Uncomment the following line in case you want to disable telemetry during the build.
31
+ # ENV NEXT_TELEMETRY_DISABLED 1
32
+
33
+ # RUN yarn build
34
+
35
+ # If you use yarn, comment out this line and use the line above
36
+ RUN npm run build
37
+
38
+ # Production image, copy all the files and run next
39
+ FROM base AS runner
40
+ WORKDIR /app
41
+
42
+ ENV NODE_ENV production
43
+ # Uncomment the following line in case you want to disable telemetry during runtime.
44
+ # ENV NEXT_TELEMETRY_DISABLED 1
45
 
46
+ RUN addgroup --system --gid 1001 nodejs
47
+ RUN adduser --system --uid 1001 nextjs
48
 
49
+ COPY --from=builder /app/public ./public
 
 
 
 
50
 
51
+ # Automatically leverage output traces to reduce image size
52
+ # https://nextjs.org/docs/advanced-features/output-file-tracing
53
+ COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./
54
+ COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static
55
 
56
+ USER nextjs
 
57
 
58
+ EXPOSE 3000
59
 
60
+ ENV PORT 3000
61
 
62
+ CMD ["node", "server.js"]
README.md CHANGED
@@ -1,12 +1,50 @@
1
  ---
2
- title: serp-chat
3
- emoji: 📚🎳
4
- colorFrom: red
5
- colorTo: green
6
  sdk: docker
7
- app_file: app.py
8
- port: 7860
9
  pinned: false
10
  ---
11
 
12
- # llamacpp-cuda-doc-chat
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: new google
3
+ emoji: 📚
4
+ colorFrom: yellow
5
+ colorTo: indigo
6
  sdk: docker
7
+ app_port: 8000
 
8
  pinned: false
9
  ---
10
 
11
+ This is a fork of https://github.com/sahandghavidel/google-clone-nextjs13
12
+
13
+ This is a [Next.js](https://nextjs.org/) project bootstrapped with [`create-next-app`](https://github.com/vercel/next.js/tree/canary/packages/create-next-app).
14
+
15
+ ## Getting Started
16
+
17
+ First, run the development server:
18
+
19
+ ```bash
20
+ npm run dev
21
+ # or
22
+ yarn dev
23
+ # or
24
+ pnpm dev
25
+ ```
26
+
27
+ Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
28
+
29
+ You can start editing the page by modifying `app/page.js`. The page auto-updates as you edit the file.
30
+
31
+ [API routes](https://nextjs.org/docs/api-routes/introduction) can be accessed on [http://localhost:3000/api/hello](http://localhost:3000/api/hello). This endpoint can be edited in `pages/api/hello.js`.
32
+
33
+ The `pages/api` directory is mapped to `/api/*`. Files in this directory are treated as [API routes](https://nextjs.org/docs/api-routes/introduction) instead of React pages.
34
+
35
+ This project uses [`next/font`](https://nextjs.org/docs/basic-features/font-optimization) to automatically optimize and load Inter, a custom Google Font.
36
+
37
+ ## Learn More
38
+
39
+ To learn more about Next.js, take a look at the following resources:
40
+
41
+ - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
42
+ - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
43
+
44
+ You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js/) - your feedback and contributions are welcome!
45
+
46
+ ## Deploy on Vercel
47
+
48
+ The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
49
+
50
+ Check out our [Next.js deployment documentation](https://nextjs.org/docs/deployment) for more details.
app.py DELETED
@@ -1,98 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- import sys
3
- import gradio as gr
4
-
5
- from llm import *
6
- from utils import *
7
- from presets import *
8
- from overwrites import *
9
-
10
- logging.basicConfig(stream=sys.stdout, level=logging.INFO)
11
- logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
12
-
13
- PromptHelper.compact_text_chunks = compact_text_chunks
14
-
15
- with gr.Blocks() as demo:
16
- with gr.Box():
17
- gr.Markdown("<h1 style='font-size: 48px; text-align: center;'>🪄 WizardLM Doc Chat 📓</h1>")
18
-
19
- chat_context = gr.State([])
20
- new_google_chat_context = gr.State([])
21
- with gr.Row():
22
- with gr.Column(scale=3):
23
- with gr.Box():
24
- gr.Markdown("**Indicies**")
25
- with gr.Row():
26
- with gr.Column(scale=12):
27
- index_select = gr.Dropdown(choices=refresh_json_list(plain=True), value="index", show_label=False, multiselect=False).style(container=False)
28
- with gr.Column(min_width=30, scale=1):
29
- index_refresh_btn = gr.Button("🔄").style()
30
-
31
- with gr.Tab("Search"):
32
- with gr.Row():
33
- with gr.Column(scale=1):
34
- chat_tone = gr.Radio(["smart", "concise", "creative"], label="chat_tone", type="index", value="concise")
35
- with gr.Column(scale=3):
36
- search_options_checkbox = gr.CheckboxGroup(label="Todo: APIs", choices=["📚 Google", "🏡 Porch", "Your API Here"])
37
- chatbot = gr.Chatbot()
38
- with gr.Row():
39
- with gr.Column(min_width=50, scale=1):
40
- chat_empty_btn = gr.Button("🧹", variant="secondary")
41
- with gr.Column(scale=12):
42
- chat_input = gr.Textbox(show_label=False, placeholder="Enter text...").style(container=False)
43
- with gr.Column(min_width=50, scale=1):
44
- chat_submit_btn = gr.Button("🚀", variant="primary")
45
-
46
-
47
- with gr.Tab("Setting"):
48
- with gr.Row():
49
- sim_k = gr.Slider(1, 10, 3, step=1, label="similarity_topk", interactive=True, show_label=True)
50
- tempurature = gr.Slider(0, 2, 0.5, step=0.1, label="tempurature", interactive=True, show_label=True)
51
- with gr.Row():
52
- with gr.Column():
53
- tmpl_select = gr.Radio(list(prompt_tmpl_dict.keys()), value="Default", label="Prompt", interactive=True)
54
- prompt_tmpl = gr.Textbox(value=prompt_tmpl_dict["Default"] ,lines=10, max_lines=40 ,show_label=False)
55
- with gr.Column():
56
- refine_select = gr.Radio(list(refine_tmpl_dict.keys()), value="Default", label="Refine", interactive=True)
57
- refine_tmpl = gr.Textbox(value=refine_tmpl_dict["Default"] ,lines=10, max_lines=40 ,show_label=False)
58
-
59
-
60
- with gr.Tab("Upload"):
61
- with gr.Row():
62
- with gr.Column():
63
- index_type = gr.Dropdown(choices=["GPTVectorStoreIndex"], label="index_type", value="GPTVectorStoreIndex")
64
- upload_file = gr.Files(label="upload_file .txt, .pdf, .epub")
65
- new_index_name = gr.Textbox(placeholder="new_index_name: ", show_label=False).style(container=False)
66
- construct_btn = gr.Button("⚒️ Index", variant="primary")
67
- with gr.Row():
68
- with gr.Column():
69
- with gr.Row():
70
- max_input_size = gr.Slider(256, 4096, 4096, step=1, label="max_input_size", interactive=True, show_label=True)
71
- num_outputs = gr.Slider(256, 4096, 512, step=1, label="num_outputs", interactive=True, show_label=True)
72
- with gr.Row():
73
- max_chunk_overlap = gr.Slider(0, 100, 20, step=1, label="max_chunk_overlap", interactive=True, show_label=True)
74
- chunk_size_limit = gr.Slider(0, 4096, 0, step=1, label="chunk_size_limit", interactive=True, show_label=True)
75
- with gr.Row():
76
- embedding_limit = gr.Slider(0, 100, 0, step=1, label="embedding_limit", interactive=True, show_label=True)
77
- separator = gr.Textbox(show_label=False, label="separator", placeholder=",", value="", interactive=True)
78
- with gr.Row():
79
- num_children = gr.Slider(2, 100, 10, step=1, label="num_children", interactive=False, show_label=True)
80
- max_keywords_per_chunk = gr.Slider(1, 100, 10, step=1, label="max_keywords_per_chunk", interactive=False, show_label=True)
81
-
82
-
83
- chat_input.submit(chat_ai, [index_select, chat_input, prompt_tmpl, refine_tmpl, sim_k, chat_tone, chat_context, chatbot, search_options_checkbox], [chat_context, chatbot])
84
- chat_input.submit(reset_textbox, [], [chat_input])
85
- chat_submit_btn.click(chat_ai, [index_select, chat_input, prompt_tmpl, refine_tmpl, sim_k, chat_tone, chat_context, chatbot, search_options_checkbox], [chat_context, chatbot])
86
- chat_submit_btn.click(reset_textbox, [], [chat_input])
87
- chat_empty_btn.click(lambda: ([], []), None, [chat_context, chatbot])
88
-
89
- tmpl_select.change(change_prompt_tmpl, [tmpl_select], [prompt_tmpl])
90
- refine_select.change(change_refine_tmpl, [refine_select], [refine_tmpl])
91
-
92
- index_type.change(lock_params, [index_type], [num_children, max_keywords_per_chunk])
93
- construct_btn.click(construct_index, [upload_file, new_index_name, index_type, max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit, embedding_limit, separator, num_children], [index_select])
94
-
95
-
96
- if __name__ == "__main__":
97
- demo.title = "WizardLM Doc Chat"
98
- demo.queue().launch(server_name="0.0.0.0", server_port=7860, share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
index/docstore.json DELETED
@@ -1 +0,0 @@
1
- {"docstore/metadata": {"8ced199c-f95b-4e3f-b257-dada8d54a55d": {"doc_hash": "46058eadd904a825aebbe7837bc99d1470311336dff046005edcbb02ad36a2e0"}, "ec3daf8e-bae2-44d6-8c70-97e5e74ebbb7": {"doc_hash": "56ceb6ae86fb3887dc0939a1f5c4dbfd994304186b817ba021de140315d0c52d"}, "9f1a3e4e-6fe6-4580-85ed-a6c15856c7a2": {"doc_hash": "25aa44f21ebc8354f28021c3daefdc80847660c61d488ce0e7e9892e35444685"}, "6691ff87-c5c6-4e59-b9c6-3257f47b77e1": {"doc_hash": "019db476096b4a9fbc014d71c9634f5d01ad3bf6e51d093ba7f5bcb3f1a6d7a1"}, "c4b8f302-807e-4714-81a6-b0b6cc5a5f55": {"doc_hash": "37e82baffd9bcb78e6fda3645adaf5b647a502b3e8c75f4f9d5fad3adc56e24e"}, "0867d7c0-321a-4248-8d1a-e38083fb8533": {"doc_hash": "c01640a9026941025384be8c763709f29955f54deb80b1f319f3673fff646b39"}, "31d64cd1-61ca-4aa3-b592-4dbdaf74ed8d": {"doc_hash": "86b2375c5e598a26712f2a748d483df9f458f192009b37d41b65e3300a156964"}, "b8b3c21d-2a62-49e6-a62f-53b87cb90f0f": {"doc_hash": "08b5ab8c522e5b8adfc2b738a155ca85335d1f2c2ede00a967f69406c54b37b3"}, "cb4b7053-efcb-4b62-bef8-d401726a49a3": {"doc_hash": "a1adc57df1046c4bab52af9f1ee767ec009c3a41b6d8b4d642ee6099d109e957"}, "65050326-fdf0-421d-8135-003d0277153a": {"doc_hash": "e042c399f010602b6ea30ddd6e02a6e2d2221a35c4f3bc67027169349762c36f"}}, "docstore/data": {"ec3daf8e-bae2-44d6-8c70-97e5e74ebbb7": {"__data__": {"text": "Bitcoin: A Peer-to-Peer Electronic Cash System\n\nSatoshi Nakamoto\nsatoshin@gmx.com\nwww.bitcoin.org\n\nAbstract. A purely peer-to-peer version of electronic cash would allow online \npayments to be sent directly from one party to another without going through a \nfinancial institution. Digital signatures provide part of the solution, but the main \nbenefits are lost if a trusted third party is still required to prevent double-spending. \nWe propose a solution to the double-spending problem using a peer-to-peer network. \nThe network timestamps transactions by hashing them into an ongoing chain of \nhash-based proof-of-work, forming a record that cannot be changed without redoing \nthe proof-of-work. The longest chain not only serves as proof of the sequence of \nevents witnessed, but proof that it came from the largest pool of CPU power. As \nlong as a majority of CPU power is controlled by nodes that are not cooperating to \nattack the network, they'll generate the longest chain and outpace attackers. The \nnetwork itself requires minimal structure. Messages are broadcast on a best effort \nbasis, and nodes can leave and rejoin the network at will, accepting the longest \nproof-of-work chain as proof of what happened while they were gone.\n\n1.\n\nIntroduction\n\nCommerce on the Internet has come to rely almost exclusively on financial institutions serving as \ntrusted third parties to process electronic payments. While the system works well enough for \nmost transactions, it still suffers from the inherent weaknesses of the trust based model. \nCompletely non-reversible transactions are not really possible, since financial institutions cannot \navoid mediating disputes. The cost of mediation increases transaction costs, limiting the \nminimum practical transaction size and cutting off the possibility for small casual transactions, \nand there is a broader cost in the loss of ability to make non-reversible payments for non-\nreversible services. With the possibility of reversal, the need for trust spreads. Merchants must \nbe wary of their customers, hassling them for more information than they would otherwise need. \nA certain percentage of fraud is accepted as unavoidable. These costs and payment uncertainties \ncan be avoided in person by using physical currency, but no mechanism exists to make payments \nover a communications channel without a trusted party.\n\nWhat is needed is an electronic payment system based on cryptographic proof instead of trust, \nallowing any two willing parties to transact directly with each other without the need for a trusted \nthird party. Transactions that are computationally impractical to reverse would protect sellers \nfrom fraud, and routine escrow mechanisms could easily be implemented to protect buyers. In \nthis paper, we propose a solution to the double-spending problem using a peer-to-peer distributed \ntimestamp server to generate computational proof of the chronological order of transactions. The \nsystem is secure as long as honest nodes collectively control more CPU power than any \ncooperating group of attacker nodes.\n\n1\n\n\f\n2. Transactions\n\nWe define an electronic coin as a chain of digital signatures. Each owner transfers the coin to the \nnext by digitally signing a hash of the previous transaction and the public key of the next owner \nand adding these to the end of the coin. A payee can verify the signatures to verify the chain of \nownership.\n\nTransaction\n\nTransaction\n\nTransaction\n\nOwner 1's\nPublic Key\n\nOwner 2's\nPublic Key\n\nOwner", "doc_id": "ec3daf8e-bae2-44d6-8c70-97e5e74ebbb7", "embedding": null, "doc_hash": "56ceb6ae86fb3887dc0939a1f5c4dbfd994304186b817ba021de140315d0c52d", "extra_info": null, "node_info": {"start": 0, "end": 3659}, "relationships": {"1": "8ced199c-f95b-4e3f-b257-dada8d54a55d", "3": "9f1a3e4e-6fe6-4580-85ed-a6c15856c7a2"}}, "__type__": "1"}, "9f1a3e4e-6fe6-4580-85ed-a6c15856c7a2": {"__data__": {"text": "distributed \ntimestamp server to generate computational proof of the chronological order of transactions. The \nsystem is secure as long as honest nodes collectively control more CPU power than any \ncooperating group of attacker nodes.\n\n1\n\n\f\n2. Transactions\n\nWe define an electronic coin as a chain of digital signatures. Each owner transfers the coin to the \nnext by digitally signing a hash of the previous transaction and the public key of the next owner \nand adding these to the end of the coin. A payee can verify the signatures to verify the chain of \nownership.\n\nTransaction\n\nTransaction\n\nTransaction\n\nOwner 1's\nPublic Key\n\nOwner 2's\nPublic Key\n\nOwner 3's\nPublic Key\n\nHash\n\nHash\n\nHash\n\nOwner 0's\nSignature\n\n Verify\n\nS ig n \n\nOwner 1's\nSignature\n\n Verify\n\nS ig n \n\nOwner 2's\nSignature\n\nOwner 1's\nPrivate Key\n\nOwner 2's\nPrivate Key\n\nOwner 3's\nPrivate Key\n\nThe problem of course is the payee can't verify that one of the owners did not double-spend \nthe coin. A common solution is to introduce a trusted central authority, or mint, that checks every \ntransaction for double spending. After each transaction, the coin must be returned to the mint to \nissue a new coin, and only coins issued directly from the mint are trusted not to be double-spent. \nThe problem with this solution is that the fate of the entire money system depends on the \ncompany running the mint, with every transaction having to go through them, just like a bank.\n\nWe need a way for the payee to know that the previous owners did not sign any earlier \ntransactions. For our purposes, the earliest transaction is the one that counts, so we don't care \nabout later attempts to double-spend. The only way to confirm the absence of a transaction is to \nbe aware of all transactions. In the mint based model, the mint was aware of all transactions and \ndecided which arrived first. To accomplish this without a trusted party, transactions must be \npublicly announced [1], and we need a system for participants to agree on a single history of the \norder in which they were received. The payee needs proof that at the time of each transaction, the \nmajority of nodes agreed it was the first received. \n\n3. Timestamp Server\n\nThe solution we propose begins with a timestamp server. A timestamp server works by taking a \nhash of a block of items to be timestamped and widely publishing the hash, such as in a \nnewspaper or Usenet post [2-5]. The timestamp proves that the data must have existed at the \ntime, obviously, in order to get into the hash. Each timestamp includes the previous timestamp in \nits hash, forming a chain, with each additional timestamp reinforcing the ones before it.\n\nHash\n\nHash\n\nBlock\n\nBlock\n\nItem\n\nItem\n\n...\n\nItem\n\nItem\n\n...\n\n2\n\n\f\n4. Proof-of-Work\n\nTo implement a distributed timestamp server on a peer-to-peer basis, we will need to use a proof-\nof-work system similar to Adam Back's Hashcash [6], rather than newspaper or Usenet posts. \nThe proof-of-work involves scanning for a value that when hashed, such as with SHA-256, the \nhash begins with a number of zero bits. The average work required is exponential in the number \nof zero bits required and can be verified by executing a single hash.\n\nFor our timestamp network, we implement the proof-of-work by incrementing a nonce in the \nblock until a value is found that gives the block's hash the required zero bits.", "doc_id": "9f1a3e4e-6fe6-4580-85ed-a6c15856c7a2", "embedding": null, "doc_hash": "25aa44f21ebc8354f28021c3daefdc80847660c61d488ce0e7e9892e35444685", "extra_info": null, "node_info": {"start": 3097, "end": 6575}, "relationships": {"1": "8ced199c-f95b-4e3f-b257-dada8d54a55d", "2": "ec3daf8e-bae2-44d6-8c70-97e5e74ebbb7", "3": "6691ff87-c5c6-4e59-b9c6-3257f47b77e1"}}, "__type__": "1"}, "6691ff87-c5c6-4e59-b9c6-3257f47b77e1": {"__data__": {"text": "Proof-of-Work\n\nTo implement a distributed timestamp server on a peer-to-peer basis, we will need to use a proof-\nof-work system similar to Adam Back's Hashcash [6], rather than newspaper or Usenet posts. \nThe proof-of-work involves scanning for a value that when hashed, such as with SHA-256, the \nhash begins with a number of zero bits. The average work required is exponential in the number \nof zero bits required and can be verified by executing a single hash.\n\nFor our timestamp network, we implement the proof-of-work by incrementing a nonce in the \nblock until a value is found that gives the block's hash the required zero bits. Once the CPU \neffort has been expended to make it satisfy the proof-of-work, the block cannot be changed \nwithout redoing the work. As later blocks are chained after it, the work to change the block \nwould include redoing all the blocks after it.\n\nBlock\n\nBlock\n\nPrev Hash\n\nNonce\n\nPrev Hash\n\nNonce\n\nTx\n\nTx\n\n...\n\nTx\n\nTx\n\n...\n\nThe proof-of-work also solves the problem of determining representation in majority decision \nmaking. If the majority were based on one-IP-address-one-vote, it could be subverted by anyone \nable to allocate many IPs. Proof-of-work is essentially one-CPU-one-vote. The majority \ndecision is represented by the longest chain, which has the greatest proof-of-work effort invested \nin it. If a majority of CPU power is controlled by honest nodes, the honest chain will grow the \nfastest and outpace any competing chains. To modify a past block, an attacker would have to \nredo the proof-of-work of the block and all blocks after it and then catch up with and surpass the \nwork of the honest nodes. We will show later that the probability of a slower attacker catching up \ndiminishes exponentially as subsequent blocks are added.\n\nTo compensate for increasing hardware speed and varying interest in running nodes over time, \nthe proof-of-work difficulty is determined by a moving average targeting an average number of \nblocks per hour. If they're generated too fast, the difficulty increases.\n\n5. Network\n\nThe steps to run the network are as follows:\n\n1) New transactions are broadcast to all nodes.\n2) Each node collects new transactions into a block. \n3) Each node works on finding a difficult proof-of-work for its block.\n4) When a node finds a proof-of-work, it broadcasts the block to all nodes.\n5) Nodes accept the block only if all transactions in it are valid and not already spent.\n6) Nodes express their acceptance of the block by working on creating the next block in the \n\nchain, using the hash of the accepted block as the previous hash.\n\nNodes always consider the longest chain to be the correct one and will keep working on \nextending it. If two nodes broadcast different versions of the next block simultaneously, some \nnodes may receive one or the other first. In that case, they work on the first one they received, \nbut save the other branch in case it becomes longer. The tie will be broken when the next proof-\nof-work is found and one branch becomes longer; the nodes that were working on the other \nbranch will then switch to the longer one.\n\n3\n\n\f\nNew transaction broadcasts do not necessarily need to reach all nodes. As long as they reach \nmany nodes, they will get into a block before long. Block broadcasts are also tolerant of dropped \nmessages. If a node does not receive a block, it will request it when it receives the next block and \nrealizes it missed one.\n\n6.\n\nIncentive\n\nBy convention, the first transaction in a block is a special transaction that starts a new coin owned \nby the creator of the block. This adds an incentive for", "doc_id": "6691ff87-c5c6-4e59-b9c6-3257f47b77e1", "embedding": null, "doc_hash": "019db476096b4a9fbc014d71c9634f5d01ad3bf6e51d093ba7f5bcb3f1a6d7a1", "extra_info": null, "node_info": {"start": 6606, "end": 10280}, "relationships": {"1": "8ced199c-f95b-4e3f-b257-dada8d54a55d", "2": "9f1a3e4e-6fe6-4580-85ed-a6c15856c7a2", "3": "c4b8f302-807e-4714-81a6-b0b6cc5a5f55"}}, "__type__": "1"}, "c4b8f302-807e-4714-81a6-b0b6cc5a5f55": {"__data__": {"text": "save the other branch in case it becomes longer. The tie will be broken when the next proof-\nof-work is found and one branch becomes longer; the nodes that were working on the other \nbranch will then switch to the longer one.\n\n3\n\n\f\nNew transaction broadcasts do not necessarily need to reach all nodes. As long as they reach \nmany nodes, they will get into a block before long. Block broadcasts are also tolerant of dropped \nmessages. If a node does not receive a block, it will request it when it receives the next block and \nrealizes it missed one.\n\n6.\n\nIncentive\n\nBy convention, the first transaction in a block is a special transaction that starts a new coin owned \nby the creator of the block. This adds an incentive for nodes to support the network, and provides \na way to initially distribute coins into circulation, since there is no central authority to issue them. \nThe steady addition of a constant of amount of new coins is analogous to gold miners expending \nresources to add gold to circulation. In our case, it is CPU time and electricity that is expended.\n\nThe incentive can also be funded with transaction fees. If the output value of a transaction is \nless than its input value, the difference is a transaction fee that is added to the incentive value of \nthe block containing the transaction. Once a predetermined number of coins have entered \ncirculation, the incentive can transition entirely to transaction fees and be completely inflation \nfree.\n\nThe incentive may help encourage nodes to stay honest. If a greedy attacker is able to \nassemble more CPU power than all the honest nodes, he would have to choose between using it \nto defraud people by stealing back his payments, or using it to generate new coins. He ought to \nfind it more profitable to play by the rules, such rules that favour him with more new coins than \neveryone else combined, than to undermine the system and the validity of his own wealth.\n\n7. Reclaiming Disk Space\n\nOnce the latest transaction in a coin is buried under enough blocks, the spent transactions before \nit can be discarded to save disk space. To facilitate this without breaking the block's hash, \ntransactions are hashed in a Merkle Tree [7][2][5], with only the root included in the block's hash. \nOld blocks can then be compacted by stubbing off branches of the tree. The interior hashes do \nnot need to be stored.\n\nBlock\n\nBlock Header (Block Hash)\n\nBlock\n\nBlock Header (Block Hash)\n\nPrev Hash\n\nNonce\n\nPrev Hash\n\nNonce\n\nRoot Hash\n\nRoot Hash\n\nHash01\n\nHash23\n\nHash01\n\nHash23\n\nHash0\n\nHash1\n\nHash2\n\nHash3\n\nHash2\n\nHash3\n\nTx0\n\nTx1\n\nTx2\n\nTx3\n\nTx3\n\nTransactions Hashed in a Merkle Tree\n\nAfter Pruning Tx0-2 from the Block\n\nA block header with no transactions would be about 80 bytes. If we suppose blocks are \ngenerated every 10 minutes, 80 bytes * 6 * 24 * 365 = 4.2MB per year. With computer systems \ntypically selling with 2GB of RAM as of 2008, and Moore's Law predicting current growth of \n1.2GB per year, storage should not be a problem even if the block headers must be kept in \nmemory.\n\n4\n\n\f\n8.\n\nSimplified Payment Verification\n\nIt is possible to verify payments without running a full network node. A user only needs to keep \na copy of the block headers of the longest proof-of-work chain, which he can get by querying \nnetwork nodes until he's convinced he has the longest chain, and obtain", "doc_id": "c4b8f302-807e-4714-81a6-b0b6cc5a5f55", "embedding": null, "doc_hash": "37e82baffd9bcb78e6fda3645adaf5b647a502b3e8c75f4f9d5fad3adc56e24e", "extra_info": null, "node_info": {"start": 10213, "end": 13672}, "relationships": {"1": "8ced199c-f95b-4e3f-b257-dada8d54a55d", "2": "6691ff87-c5c6-4e59-b9c6-3257f47b77e1", "3": "0867d7c0-321a-4248-8d1a-e38083fb8533"}}, "__type__": "1"}, "0867d7c0-321a-4248-8d1a-e38083fb8533": {"__data__": {"text": "would be about 80 bytes. If we suppose blocks are \ngenerated every 10 minutes, 80 bytes * 6 * 24 * 365 = 4.2MB per year. With computer systems \ntypically selling with 2GB of RAM as of 2008, and Moore's Law predicting current growth of \n1.2GB per year, storage should not be a problem even if the block headers must be kept in \nmemory.\n\n4\n\n\f\n8.\n\nSimplified Payment Verification\n\nIt is possible to verify payments without running a full network node. A user only needs to keep \na copy of the block headers of the longest proof-of-work chain, which he can get by querying \nnetwork nodes until he's convinced he has the longest chain, and obtain the Merkle branch \nlinking the transaction to the block it's timestamped in. He can't check the transaction for \nhimself, but by linking it to a place in the chain, he can see that a network node has accepted it, \nand blocks added after it further confirm the network has accepted it.\n\nLongest Proof-of-Work Chain\n\nBlock Header\n\nBlock Header\n\nBlock Header\n\nPrev Hash\n\nNonce\n\nPrev Hash\n\nNonce\n\nPrev Hash\n\nNonce\n\nMerkle Root\n\nMerkle Root\n\nMerkle Root\n\nHash01\n\nHash23\n\nMerkle Branch for Tx3\n\nHash2\n\nHash3\n\nTx3\n\nAs such, the verification is reliable as long as honest nodes control the network, but is more \nvulnerable if the network is overpowered by an attacker. While network nodes can verify \ntransactions for themselves, the simplified method can be fooled by an attacker's fabricated \ntransactions for as long as the attacker can continue to overpower the network. One strategy to \nprotect against this would be to accept alerts from network nodes when they detect an invalid \nblock, prompting the user's software to download the full block and alerted transactions to \nconfirm the inconsistency. Businesses that receive frequent payments will probably still want to \nrun their own nodes for more independent security and quicker verification.\n\n9. Combining and Splitting Value\n\nAlthough it would be possible to handle coins individually, it would be unwieldy to make a \nseparate transaction for every cent in a transfer. To allow value to be split and combined, \ntransactions contain multiple inputs and outputs. Normally there will be either a single input \nfrom a larger previous transaction or multiple inputs combining smaller amounts, and at most two \noutputs: one for the payment, and one returning the change, if any, back to the sender. \n\nTransaction\n\nOut\n\n...\n\nIn\n\nIn\n\n...\n\nIt should be noted that fan-out, where a transaction depends on several transactions, and those \ntransactions depend on many more, is not a problem here. There is never the need to extract a \ncomplete standalone copy of a transaction's history.\n\n5\n\n\f\n10. Privacy\n\nThe traditional banking model achieves a level of privacy by limiting access to information to the \nparties involved and the trusted third party. The necessity to announce all transactions publicly \nprecludes this method, but privacy can still be maintained by breaking the flow of information in \nanother place: by keeping public keys anonymous. The public can see that someone is sending \nan amount to someone else, but without information linking the transaction to anyone. This is \nsimilar to the", "doc_id": "0867d7c0-321a-4248-8d1a-e38083fb8533", "embedding": null, "doc_hash": "c01640a9026941025384be8c763709f29955f54deb80b1f319f3673fff646b39", "extra_info": null, "node_info": {"start": 13744, "end": 17120}, "relationships": {"1": "8ced199c-f95b-4e3f-b257-dada8d54a55d", "2": "c4b8f302-807e-4714-81a6-b0b6cc5a5f55", "3": "31d64cd1-61ca-4aa3-b592-4dbdaf74ed8d"}}, "__type__": "1"}, "31d64cd1-61ca-4aa3-b592-4dbdaf74ed8d": {"__data__": {"text": "should be noted that fan-out, where a transaction depends on several transactions, and those \ntransactions depend on many more, is not a problem here. There is never the need to extract a \ncomplete standalone copy of a transaction's history.\n\n5\n\n\f\n10. Privacy\n\nThe traditional banking model achieves a level of privacy by limiting access to information to the \nparties involved and the trusted third party. The necessity to announce all transactions publicly \nprecludes this method, but privacy can still be maintained by breaking the flow of information in \nanother place: by keeping public keys anonymous. The public can see that someone is sending \nan amount to someone else, but without information linking the transaction to anyone. This is \nsimilar to the level of information released by stock exchanges, where the time and size of \nindividual trades, the \"tape\", is made public, but without telling who the parties were.\n\nTraditional Privacy Model\n\nIdentities\n\nTransactions\n\nTrusted\nThird Party\n\nCounterparty\n\nPublic\n\nNew Privacy Model\n\nIdentities\n\nTransactions\n\nPublic\n\nAs an additional firewall, a new key pair should be used for each transaction to keep them \nfrom being linked to a common owner. Some linking is still unavoidable with multi-input \ntransactions, which necessarily reveal that their inputs were owned by the same owner. The risk \nis that if the owner of a key is revealed, linking could reveal other transactions that belonged to \nthe same owner.\n\n11. Calculations\n\nWe consider the scenario of an attacker trying to generate an alternate chain faster than the honest \nchain. Even if this is accomplished, it does not throw the system open to arbitrary changes, such \nas creating value out of thin air or taking money that never belonged to the attacker. Nodes are \nnot going to accept an invalid transaction as payment, and honest nodes will never accept a block \ncontaining them. An attacker can only try to change one of his own transactions to take back \nmoney he recently spent.\n\nThe race between the honest chain and an attacker chain can be characterized as a Binomial \nRandom Walk. The success event is the honest chain being extended by one block, increasing its \nlead by +1, and the failure event is the attacker's chain being extended by one block, reducing the \ngap by -1.\n\nThe probability of an attacker catching up from a given deficit is analogous to a Gambler's \nRuin problem. Suppose a gambler with unlimited credit starts at a deficit and plays potentially an \ninfinite number of trials to try to reach breakeven. We can calculate the probability he ever \nreaches breakeven, or that an attacker ever catches up with the honest chain, as follows [8]:\n\np = probability an honest node finds the next block\nq = probability the attacker finds the next block\nqz = probability the attacker will ever catch up from z blocks behind\n\nq z={ 1\n\n\ue09eq / p\ue09fz\n\nif p\u2264q\n\nif p\ue085q}\n\n6\n\n\f\nGiven our assumption that p > q, the probability drops exponentially as the number of blocks the \nattacker has to catch up with increases. With the odds against him, if he doesn't make a lucky \nlunge forward early on, his chances become vanishingly small as he falls further behind.\n\nWe now consider how long the recipient of a new transaction needs to wait before being \nsufficiently certain the sender can't change the transaction. We assume the sender is an attacker \nwho wants to make the recipient believe he paid him for a while, then switch it to pay back to \nhimself after some time has passed. The receiver will be alerted when that happens, but the \nsender hopes it will be too late.\n\nThe receiver generates a new key pair and gives the", "doc_id": "31d64cd1-61ca-4aa3-b592-4dbdaf74ed8d", "embedding": null, "doc_hash": "86b2375c5e598a26712f2a748d483df9f458f192009b37d41b65e3300a156964", "extra_info": null, "node_info": {"start": 17007, "end": 20730}, "relationships": {"1": "8ced199c-f95b-4e3f-b257-dada8d54a55d", "2": "0867d7c0-321a-4248-8d1a-e38083fb8533", "3": "b8b3c21d-2a62-49e6-a62f-53b87cb90f0f"}}, "__type__": "1"}, "b8b3c21d-2a62-49e6-a62f-53b87cb90f0f": {"__data__": {"text": "our assumption that p > q, the probability drops exponentially as the number of blocks the \nattacker has to catch up with increases. With the odds against him, if he doesn't make a lucky \nlunge forward early on, his chances become vanishingly small as he falls further behind.\n\nWe now consider how long the recipient of a new transaction needs to wait before being \nsufficiently certain the sender can't change the transaction. We assume the sender is an attacker \nwho wants to make the recipient believe he paid him for a while, then switch it to pay back to \nhimself after some time has passed. The receiver will be alerted when that happens, but the \nsender hopes it will be too late.\n\nThe receiver generates a new key pair and gives the public key to the sender shortly before \nsigning. This prevents the sender from preparing a chain of blocks ahead of time by working on \nit continuously until he is lucky enough to get far enough ahead, then executing the transaction at \nthat moment. Once the transaction is sent, the dishonest sender starts working in secret on a \nparallel chain containing an alternate version of his transaction.\n\nThe recipient waits until the transaction has been added to a block and z blocks have been \nlinked after it. He doesn't know the exact amount of progress the attacker has made, but \nassuming the honest blocks took the average expected time per block, the attacker's potential \nprogress will be a Poisson distribution with expected value:\n\n\ue0c1=z\n\nq\np\n\nTo get the probability the attacker could still catch up now, we multiply the Poisson density for \neach amount of progress he could have made by the probability he could catch up from that point:\n\n\u221e \ue0c1k e\u2212\ue0c1\n\u2211\nk !\n\nk =0\n\n\u22c5{\ue09eq / p\ue09f\ue09e z\u2212k \ue09f\n\n1\n\nif k \u2264 z\n\nif k \ue085 z}\n\nRearranging to avoid summing the infinite tail of the distribution...\n\n1\u2212\u2211\n\nz \ue0c1k e\u2212\ue0c1\nk!\n\nk =0\n\n\ue09e1\u2212\ue09eq / p\ue09f\ue09e z\u2212 k \ue09f\ue09f\n\nConverting to C code...\n\n#include <math.h>\ndouble AttackerSuccessProbability(double q, int z)\n{\n double p = 1.0 - q;\n double lambda = z * (q / p);\n double sum = 1.0;\n int i, k;\n for (k = 0; k <= z; k++)\n {\n double poisson = exp(-lambda);\n for (i = 1; i <= k; i++)\n poisson *= lambda / i;\n sum -= poisson * (1 - pow(q / p, z - k));\n }\n return sum;\n}\n\n7\n\n\f\nRunning some results, we can see the probability drop off exponentially with z.\n\nq=0.1\nz=0 P=1.0000000\nz=1 P=0.2045873\nz=2 P=0.0509779\nz=3 P=0.0131722\nz=4 P=0.0034552\nz=5 P=0.0009137\nz=6 P=0.0002428\nz=7 P=0.0000647\nz=8 P=0.0000173\nz=9 P=0.0000046\nz=10 P=0.0000012\n\nq=0.3\nz=0 P=1.0000000\nz=5 P=0.1773523\nz=10 ", "doc_id": "b8b3c21d-2a62-49e6-a62f-53b87cb90f0f", "embedding": null, "doc_hash": "08b5ab8c522e5b8adfc2b738a155ca85335d1f2c2ede00a967f69406c54b37b3", "extra_info": null, "node_info": {"start": 20764, "end": 23431}, "relationships": {"1": "8ced199c-f95b-4e3f-b257-dada8d54a55d", "2": "31d64cd1-61ca-4aa3-b592-4dbdaf74ed8d", "3": "cb4b7053-efcb-4b62-bef8-d401726a49a3"}}, "__type__": "1"}, "cb4b7053-efcb-4b62-bef8-d401726a49a3": {"__data__": {"text": "off exponentially with z.\n\nq=0.1\nz=0 P=1.0000000\nz=1 P=0.2045873\nz=2 P=0.0509779\nz=3 P=0.0131722\nz=4 P=0.0034552\nz=5 P=0.0009137\nz=6 P=0.0002428\nz=7 P=0.0000647\nz=8 P=0.0000173\nz=9 P=0.0000046\nz=10 P=0.0000012\n\nq=0.3\nz=0 P=1.0000000\nz=5 P=0.1773523\nz=10 P=0.0416605\nz=15 P=0.0101008\nz=20 P=0.0024804\nz=25 P=0.0006132\nz=30 P=0.0001522\nz=35 P=0.0000379\nz=40 P=0.0000095\nz=45 P=0.0000024\nz=50 P=0.0000006\n\nSolving for P less than 0.1%...\n\nP < 0.001\nq=0.10 z=5\nq=0.15 z=8\nq=0.20 z=11\nq=0.25 z=15\nq=0.30 z=24\nq=0.35 z=41\nq=0.40 z=89\nq=0.45 z=340\n\n12. Conclusion\n\nWe have proposed a system for electronic transactions without relying on trust. We started with \nthe usual framework of coins made from digital signatures, which provides strong control of \nownership, but is incomplete without a way to prevent double-spending. To solve this, we \nproposed a peer-to-peer network using proof-of-work to record a public history of transactions \nthat quickly becomes computationally impractical for an attacker to change if honest nodes \ncontrol a majority of CPU power. The network is robust in its unstructured simplicity. Nodes \nwork all at once with little coordination. They do not need to be identified, since messages are \nnot routed to any particular place and only need to be delivered on a best effort basis. Nodes can \nleave and rejoin the network at will, accepting the proof-of-work chain as proof of what \nhappened while they were gone. They vote with their CPU power, expressing their acceptance of \nvalid blocks by working on extending them and rejecting invalid blocks by refusing to work on \nthem. Any needed rules and incentives can be enforced with this consensus mechanism.\n\n8\n\n\f\nReferences\n\n[1] W. Dai, \"b-money,\" http://www.weidai.com/bmoney.txt, 1998.\n\n[2] H. Massias, X.S. Avila, and J.-J. Quisquater, \"Design of a secure timestamping service with minimal \n\ntrust requirements,\" In 20th Symposium on Information Theory in the Benelux, May 1999.\n\n[3] S. Haber, W.S. Stornetta, \"How to time-stamp a digital document,\" In Journal of Cryptology, vol 3, no \n\n2, pages 99-111, 1991.\n\n[4] D. Bayer, S. Haber, W.S. Stornetta, \"Improving the efficiency and reliability of digital time-stamping,\" \nIn Sequences II: Methods in Communication, Security and Computer Science, pages 329-334, 1993.\n\n[5] S. Haber, W.S. Stornetta, \"Secure names for bit-strings,\" In Proceedings of the", "doc_id": "cb4b7053-efcb-4b62-bef8-d401726a49a3", "embedding": null, "doc_hash": "a1adc57df1046c4bab52af9f1ee767ec009c3a41b6d8b4d642ee6099d109e957", "extra_info": null, "node_info": {"start": 23806, "end": 26344}, "relationships": {"1": "8ced199c-f95b-4e3f-b257-dada8d54a55d", "2": "b8b3c21d-2a62-49e6-a62f-53b87cb90f0f", "3": "65050326-fdf0-421d-8135-003d0277153a"}}, "__type__": "1"}, "65050326-fdf0-421d-8135-003d0277153a": {"__data__": {"text": "Massias, X.S. Avila, and J.-J. Quisquater, \"Design of a secure timestamping service with minimal \n\ntrust requirements,\" In 20th Symposium on Information Theory in the Benelux, May 1999.\n\n[3] S. Haber, W.S. Stornetta, \"How to time-stamp a digital document,\" In Journal of Cryptology, vol 3, no \n\n2, pages 99-111, 1991.\n\n[4] D. Bayer, S. Haber, W.S. Stornetta, \"Improving the efficiency and reliability of digital time-stamping,\" \nIn Sequences II: Methods in Communication, Security and Computer Science, pages 329-334, 1993.\n\n[5] S. Haber, W.S. Stornetta, \"Secure names for bit-strings,\" In Proceedings of the 4th ACM Conference \n\non Computer and Communications Security, pages 28-35, April 1997.\n\n[6] A. Back, \"Hashcash - a denial of service counter-measure,\" \nhttp://www.hashcash.org/papers/hashcash.pdf, 2002.\n\n[7] R.C. Merkle, \"Protocols for public key cryptosystems,\" In Proc. 1980 Symposium on Security and \n\nPrivacy, IEEE Computer Society, pages 122-133, April 1980.\n\n[8] W. Feller, \"An introduction to probability theory and its applications,\" 1957.\n\n9\n\n\f", "doc_id": "65050326-fdf0-421d-8135-003d0277153a", "embedding": null, "doc_hash": "e042c399f010602b6ea30ddd6e02a6e2d2221a35c4f3bc67027169349762c36f", "extra_info": null, "node_info": {"start": 25973, "end": 27035}, "relationships": {"1": "8ced199c-f95b-4e3f-b257-dada8d54a55d", "2": "cb4b7053-efcb-4b62-bef8-d401726a49a3"}}, "__type__": "1"}}}
 
 
index/index_store.json DELETED
@@ -1 +0,0 @@
1
- {"index_store/data": {"7f82bee6-1192-4ff9-9732-1eaadb062490": {"__type__": "list", "__data__": {"index_id": "7f82bee6-1192-4ff9-9732-1eaadb062490", "summary": null, "nodes": ["ec3daf8e-bae2-44d6-8c70-97e5e74ebbb7", "9f1a3e4e-6fe6-4580-85ed-a6c15856c7a2", "6691ff87-c5c6-4e59-b9c6-3257f47b77e1", "c4b8f302-807e-4714-81a6-b0b6cc5a5f55", "0867d7c0-321a-4248-8d1a-e38083fb8533", "31d64cd1-61ca-4aa3-b592-4dbdaf74ed8d", "b8b3c21d-2a62-49e6-a62f-53b87cb90f0f", "cb4b7053-efcb-4b62-bef8-d401726a49a3", "65050326-fdf0-421d-8135-003d0277153a"]}}}}
 
 
index/vector_store.json DELETED
@@ -1 +0,0 @@
1
- {"embedding_dict": {}, "text_id_to_doc_id": {}}
 
 
jsconfig.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "baseUrl": ".",
4
+ "paths": {
5
+ "@/*": ["./src/*"]
6
+ }
7
+ }
8
+ }
llm.py DELETED
@@ -1,276 +0,0 @@
1
- import os
2
- from langchain.llms import LlamaCpp
3
- from llama_index import (
4
- GPTVectorStoreIndex,
5
- GPTListIndex,
6
- ServiceContext,
7
- ResponseSynthesizer,
8
- LangchainEmbedding
9
- )
10
- from langchain.embeddings import HuggingFaceEmbeddings
11
- from llama_index import download_loader, StorageContext, load_index_from_storage
12
- from llama_index import (
13
- Document,
14
- LLMPredictor,
15
- PromptHelper
16
- )
17
- from llama_index.indices.postprocessor import SimilarityPostprocessor
18
- from llama_index.query_engine import RetrieverQueryEngine
19
- from llama_index.storage.index_store import SimpleIndexStore
20
- from llama_index.storage.docstore import SimpleDocumentStore
21
- from llama_index.storage.storage_context import SimpleVectorStore
22
-
23
- from googlesearch import search as google_search
24
-
25
- from utils import *
26
-
27
- import logging
28
- import argparse
29
-
30
- model_path = "wizardLM-7B.ggmlv3.q4_0.bin"
31
-
32
-
33
- def query_llm(index, prompt, service_context, retriever_mode='embedding', response_mode='compact'):
34
- response_synthesizer = ResponseSynthesizer.from_args(
35
- service_context=service_context,
36
- node_postprocessors=[
37
- SimilarityPostprocessor(similarity_cutoff=0.7)
38
- ]
39
- )
40
- retriever = index.as_retriever(retriever_mode=retriever_mode, service_context=service_context)
41
- query_engine = RetrieverQueryEngine.from_args(retriever, response_synthesizer=response_synthesizer, response_mode=response_mode, service_context=service_context)
42
- return query_engine.query(prompt)
43
-
44
-
45
- def get_documents(file_src):
46
- documents = []
47
- logging.debug("Loading documents...")
48
- print(f"file_src: {file_src}")
49
- for file in file_src:
50
- if type(file) == str:
51
- print(f"file: {file}")
52
- if "http" in file:
53
- logging.debug("Loading web page...")
54
- BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
55
- loader = BeautifulSoupWebReader()
56
- documents += loader.load_data([file])
57
- else:
58
- logging.debug(f"file: {file.name}")
59
- if os.path.splitext(file.name)[1] == ".pdf":
60
- logging.debug("Loading PDF...")
61
- CJKPDFReader = download_loader("CJKPDFReader")
62
- loader = CJKPDFReader()
63
- documents += loader.load_data(file=file.name)
64
- else:
65
- logging.debug("Loading text file...")
66
- with open(file.name, "r", encoding="utf-8") as f:
67
- text = add_space(f.read())
68
- documents += [Document(text)]
69
- return documents
70
-
71
-
72
- def construct_index(
73
- file_src,
74
- index_name,
75
- index_type,
76
- max_input_size=2048,
77
- num_outputs=2048,
78
- max_chunk_overlap=20,
79
- chunk_size_limit=None,
80
- embedding_limit=None,
81
- separator=" ",
82
- num_children=10,
83
- max_keywords_per_chunk=10
84
- ):
85
- chunk_size_limit = None if chunk_size_limit == 0 else chunk_size_limit
86
- embedding_limit = None if embedding_limit == 0 else embedding_limit
87
- separator = " " if separator == "" else separator
88
-
89
- llm = LlamaCpp(
90
- model_path=model_path,
91
- n_ctx=2048,
92
- use_mlock=True,
93
- n_parts=-1,
94
- temperature=0.7,
95
- top_p=0.40,
96
- last_n_tokens_size=100,
97
- n_threads=8,
98
- f16_kv=True,
99
- max_tokens=150
100
- )
101
- llm_predictor = LLMPredictor(
102
- llm=llm
103
- )
104
- prompt_helper = PromptHelper(
105
- max_input_size,
106
- num_outputs,
107
- max_chunk_overlap,
108
- embedding_limit,
109
- chunk_size_limit,
110
- separator=separator,
111
- )
112
- service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
113
- documents = get_documents(file_src)
114
-
115
- try:
116
- if index_type == "_GPTVectorStoreIndex":
117
- index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
118
- else:
119
- index = GPTListIndex.from_documents(documents, service_context=service_context)
120
- index.storage_context.persist(persist_dir="./index")
121
- except Exception as e:
122
- print(e)
123
- return None
124
-
125
-
126
- newlist = refresh_json_list(plain=True)
127
- return gr.Dropdown.update(choices=newlist, value=index_name)
128
-
129
-
130
- def chat_ai(
131
- index_select,
132
- question,
133
- prompt_tmpl,
134
- refine_tmpl,
135
- sim_k,
136
- chat_tone,
137
- context,
138
- chatbot,
139
- search_mode=[],
140
- ):
141
- if index_select == "search" and search_mode==[]:
142
- chatbot.append((question, "❗search"))
143
- return context, chatbot
144
-
145
- logging.info(f"Question: {question}")
146
-
147
- temprature = 2 if chat_tone == 0 else 1 if chat_tone == 1 else 0.5
148
- if search_mode:
149
- index_select = search_construct(question, search_mode, index_select)
150
- logging.debug(f"Index: {index_select}")
151
- response = ask_ai(
152
- index_select,
153
- question,
154
- prompt_tmpl,
155
- refine_tmpl,
156
- sim_k,
157
- temprature,
158
- context
159
- )
160
- print(response)
161
-
162
- if response is None:
163
- response = "Please upload a document first"
164
- response = parse_text(response)
165
-
166
- context.append({"role": "user", "content": question})
167
- context.append({"role": "assistant", "content": response})
168
- chatbot.append((question, response))
169
-
170
- return context, chatbot
171
-
172
-
173
- def ask_ai(
174
- index_select,
175
- question,
176
- prompt_tmpl,
177
- refine_tmpl,
178
- sim_k=1,
179
- temprature=0,
180
- prefix_messages=[]
181
- ):
182
- logging.debug("Querying index...")
183
- prompt_helper = PromptHelper(
184
- 4096,
185
- 150,
186
- -20000
187
- )
188
- llm = LlamaCpp(model_path=model_path,
189
- n_ctx=2048,
190
- use_mlock=True,
191
- n_parts=-1,
192
- temperature=temprature,
193
- top_p=0.40,
194
- last_n_tokens_size=100,
195
- n_threads=4,
196
- f16_kv=True,
197
- max_tokens=200,
198
- n_gpu_layers=40
199
- )
200
- embeddings = HuggingFaceEmbeddings(model_kwargs={"device": "cuda"})
201
- embed_model = LangchainEmbedding(embeddings)
202
- llm_predictor = LLMPredictor(
203
- llm=llm
204
- )
205
- service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model, prompt_helper=prompt_helper)
206
- response = None
207
- logging.debug("Using GPTVectorStoreIndex")
208
- storage_context = StorageContext.from_defaults(
209
- docstore=SimpleDocumentStore.from_persist_dir(persist_dir="./index"),
210
- vector_store=SimpleVectorStore.from_persist_dir(persist_dir="./index"),
211
- index_store=SimpleIndexStore.from_persist_dir(persist_dir="./index"),
212
- )
213
- if storage_context is not None:
214
- index = load_index_from_storage(service_context=service_context, storage_context=storage_context)
215
- response = query_llm(index, question, service_context)
216
-
217
- if response is not None:
218
- logging.info(f"Response: {response}")
219
- ret_text = response.response
220
- return ret_text
221
- else:
222
- logging.debug("No response found, returning None")
223
- return None
224
-
225
-
226
- def search_construct(question, search_mode, index_select):
227
- print(f"You asked: {question}")
228
- llm = LlamaCpp(model_path=model_path,
229
- n_ctx=400,
230
- use_mlock=True,
231
- n_parts=-1,
232
- temperature=1,
233
- top_p=0.40,
234
- last_n_tokens_size=100,
235
- n_threads=6,
236
- f16_kv=True,
237
- max_tokens=100
238
- )
239
- chat = llm
240
- search_terms = (
241
- chat.generate(
242
- [
243
- f"Please extract search terms from the user’s question. The search terms is a concise sentence, which will be searched on Google to obtain relevant information to answer the user’s question, too generalized search terms doesn’t help. Please provide no more than two search terms. Please provide the most relevant search terms only, the search terms should directly correspond to the user’s question. Please separate different search items with commas, with no quote marks. The user’s question is: {question}"
244
- ]
245
- )
246
- .generations[0][0]
247
- .text.strip()
248
- )
249
- search_terms = search_terms.replace('"', "")
250
- search_terms = search_terms.replace(".", "")
251
- links = []
252
- for keywords in search_terms.split(","):
253
- keywords = keywords.strip()
254
- for search_engine in search_mode:
255
- if "Google" in search_engine:
256
- print(f"Googling: {keywords}")
257
- search_iter = google_search(keywords, num_results=5)
258
- links += [next(search_iter) for _ in range(10)]
259
- if "Manual" in search_engine:
260
- print(f"Searching manually: {keywords}")
261
- print("Please input links manually. (Enter 'q' to quit.)")
262
- while True:
263
- link = input("Enter link:\n")
264
- if link == "q":
265
- break
266
- else:
267
- links.append(link)
268
- links = list(set(links))
269
- if len(links) == 0:
270
- return index_select
271
- print("Extracting data from links...")
272
- print("\n".join(links))
273
- search_index_name = " ".join(search_terms.split(","))
274
- construct_index(links, search_index_name, "GPTVectorStoreIndex")
275
- print(f"Index {search_index_name} constructed.")
276
- return search_index_name + "_GPTVectorStoreIndex"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
next.config.js ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /** @type {import('next').NextConfig} */
2
+ const nextConfig = {
3
+ experimental: {
4
+ appDir: true,
5
+ },
6
+ images: {
7
+ domains: ["upload.wikimedia.org"],
8
+ },
9
+ };
10
+
11
+ module.exports = nextConfig;
overwrites.py DELETED
@@ -1,12 +0,0 @@
1
- from llama_index import Prompt
2
- from typing import List
3
- import logging
4
-
5
- def compact_text_chunks(self, prompt: Prompt, text_chunks: List[str]) -> List[str]:
6
- logging.debug("Compacting text chunks...🚀🚀🚀")
7
- combined_str = [c.strip() for c in text_chunks if c.strip()]
8
- combined_str = [f"[{index+1}] {c}" for index, c in enumerate(combined_str)]
9
- combined_str = "\n\n".join(combined_str)
10
- # resplit based on self.max_chunk_overlap
11
- text_splitter = self.get_text_splitter_given_prompt(prompt, 1, padding=1)
12
- return text_splitter.split_text(combined_str)
 
 
 
 
 
 
 
 
 
 
 
 
 
package-lock.json ADDED
The diff for this file is too large to render. See raw diff
 
package.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "google-clone-nextjs13",
3
+ "version": "0.1.0",
4
+ "private": true,
5
+ "scripts": {
6
+ "dev": "next dev",
7
+ "build": "next build",
8
+ "start": "next start",
9
+ "lint": "next lint"
10
+ },
11
+ "dependencies": {
12
+ "@next/font": "13.1.6",
13
+ "eslint": "8.34.0",
14
+ "eslint-config-next": "13.1.6",
15
+ "html-react-parser": "^3.0.9",
16
+ "next": "13.1.6",
17
+ "react": "18.2.0",
18
+ "react-dom": "18.2.0",
19
+ "react-icons": "^4.7.1"
20
+ },
21
+ "devDependencies": {
22
+ "autoprefixer": "^10.4.13",
23
+ "postcss": "^8.4.21",
24
+ "tailwindcss": "^3.2.7"
25
+ }
26
+ }
postcss.config.js ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ module.exports = {
2
+ plugins: {
3
+ tailwindcss: {},
4
+ autoprefixer: {},
5
+ },
6
+ }
presets.py DELETED
@@ -1,29 +0,0 @@
1
- prompt_tmpl_dict = {
2
- "Default":
3
- """\
4
- Context information is below.
5
- ---------------------
6
- {context_str}
7
- ---------------------
8
- Using the provided context information, write a comprehensive reply to the given query.
9
- Make sure to cite results using [number] notation after the reference.
10
- If the provided context information refer to multiple subjects with the same name, write separate answers for each subject.
11
- Use prior knowledge only if the given context didn't provide enough information.
12
- Answer the question: {query_str}
13
- """
14
- }
15
-
16
- refine_tmpl_dict = {
17
- "Default":
18
- """\
19
- The original question is as follows: {query_str}
20
- We have provided an existing answer: {existing_answer}
21
- We have the opportunity to refine the existing answer
22
- (only if needed) with some more context below.
23
- ------------
24
- {context_msg}
25
- ------------
26
- Given the new context, refine the original answer to better
27
- If the context isn't useful, return the original answer.
28
- """
29
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
public/favicon.svg ADDED
public/spinner.svg ADDED
requirements.txt DELETED
@@ -1,10 +0,0 @@
1
- gradio
2
- llama_index
3
- langchain
4
- googlesearch-python
5
- llama-cpp-python
6
- IPython
7
- transformers
8
- sentence-transformers
9
- --extra-index-url https://download.pytorch.org/whl/cu113
10
- torch
 
 
 
 
 
 
 
 
 
 
 
src/app/globals.css ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @tailwind base;
2
+ @tailwind components;
3
+ @tailwind utilities;
4
+
5
+ @layer components {
6
+ .btn {
7
+ @apply bg-[#f8f9fa] rounded-md text-sm text-gray-800 hover:ring-1 hover:ring-gray-200 focus:outline-none active:ring-gray-300 hover:shadow-md w-36 h-10 transition-shadow;
8
+ }
9
+
10
+ .link {
11
+ @apply hover:underline cursor-pointer;
12
+ }
13
+
14
+ .header-icon{
15
+ @apply bg-transparent hover:bg-gray-200 p-2 text-4xl rounded-full cursor-pointer
16
+ }
17
+ }
src/app/head.jsx ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ export default function Head() {
2
+ return (
3
+ <>
4
+ <title>Google Clone Next js 13</title>
5
+ <meta content="width=device-width, initial-scale=1" name="viewport" />
6
+ <meta name="description" content="Google clone created by Next js 13" />
7
+ <link rel="icon" href="/favicon.svg" />
8
+ </>
9
+ )
10
+ }
src/app/layout.jsx ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import Footer from "@/components/Footer";
2
+ import "./globals.css";
3
+
4
+ export default function RootLayout({ children }) {
5
+ return (
6
+ <html lang="en">
7
+ {/*
8
+ <head /> will contain the components returned by the nearest parent
9
+ head.js. Find out more at https://beta.nextjs.org/docs/api-reference/file-conventions/head
10
+ */}
11
+ <body className="relative min-h-screen">
12
+ {children}
13
+ {/* Footer */}
14
+
15
+ <Footer />
16
+ </body>
17
+ </html>
18
+ );
19
+ }
src/app/page.jsx ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import HomeHeader from "@/components/HomeHeader";
2
+ import HomeSearch from "@/components/HomeSearch";
3
+ import Image from "next/image";
4
+
5
+ export default function Home() {
6
+ return (
7
+ <>
8
+ {/* Header */}
9
+ <HomeHeader />
10
+
11
+ {/* body */}
12
+
13
+ <div className="flex flex-col items-center mt-24">
14
+ <Image
15
+ width="300"
16
+ height="100"
17
+ src="https://upload.wikimedia.org/wikinews/en/archive/0/0c/20050620003808%21Google_logo_png.png"
18
+ />
19
+
20
+ <HomeSearch/>
21
+ </div>
22
+ </>
23
+ );
24
+ }
src/app/search/error.jsx ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client";
2
+
3
+ import { useEffect } from "react";
4
+
5
+ export default function Error({ error, reset }) {
6
+ useEffect(() => {
7
+ console.log("Error: ", error);
8
+ }, [error]);
9
+ return (
10
+ <div className="flex flex-col justify-center items-center pt-10">
11
+ <h1 className="text-3xl mb-4">Something went wrong</h1>
12
+ <button className="text-blue-500" onClick={() => reset()}>Try again</button>
13
+ </div>
14
+ );
15
+ }
src/app/search/image/loading.jsx ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export default function loading() {
2
+ return (
3
+ <div className="pt-10 mx-2 lg:pl-52 max-w-6xl flex sm:space-x-4 flex-col sm:flex-row pb-42">
4
+ <div className="animate-pulse">
5
+ <div className="h-48 w-48 mb-4 bg-gray-200 rounded-md"></div>
6
+ <div className="h-2 w-48 mb-2.5 bg-gray-200 rounded-md"></div>
7
+ <div className="h-2 w-44 mb-2.5 bg-gray-200 rounded-md"></div>
8
+ </div>
9
+ <div className="hidden sm:inline-flex sm:space-x-4">
10
+ <div className="animate-pulse">
11
+ <div className="h-48 w-48 mb-4 bg-gray-200 rounded-md"></div>
12
+ <div className="h-2 w-48 mb-2.5 bg-gray-200 rounded-md"></div>
13
+ <div className="h-2 w-44 mb-2.5 bg-gray-200 rounded-md"></div>
14
+ </div>
15
+ <div className="animate-pulse">
16
+ <div className="h-48 w-48 mb-4 bg-gray-200 rounded-md"></div>
17
+ <div className="h-2 w-48 mb-2.5 bg-gray-200 rounded-md"></div>
18
+ <div className="h-2 w-44 mb-2.5 bg-gray-200 rounded-md"></div>
19
+ </div>
20
+ </div>
21
+ </div>
22
+ );
23
+ }
src/app/search/image/page.jsx ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export const dynamic = "force-dynamic";
2
+
3
+ import ImageSearchResults from "@/components/ImageSearchResults";
4
+ import Link from "next/link";
5
+
6
+ export default async function ImageSearchPage({ searchParams }) {
7
+ const startIndex = searchParams.start || "1";
8
+ await new Promise((resolve) => setTimeout(resolve, 2000));
9
+ const response = await fetch(
10
+ `https://www.googleapis.com/customsearch/v1?key=${process.env.API_KEY}&cx=${process.env.CONTEXT_KEY}&q=${searchParams.searchTerm}}&searchType=image&start=${startIndex}`
11
+ );
12
+
13
+ if (!response.ok) {
14
+ console.log(response);
15
+ throw new Error("Something went wrong");
16
+ }
17
+
18
+ const data = await response.json();
19
+
20
+ const results = data.items;
21
+
22
+ if (!results) {
23
+ return (
24
+ <div className="flex flex-col justify-center items-center pt-10">
25
+ <h1 className="text-3xl mb-4">No results found</h1>
26
+ <p className="text-lg">
27
+ Try searching for something else or go back to the homepage{" "}
28
+ <Link href="/" className="text-blue-500">
29
+ Home
30
+ </Link>
31
+ </p>
32
+ </div>
33
+ );
34
+ }
35
+ return <>{results && <ImageSearchResults results={data} />}</>;
36
+ }
src/app/search/layout.jsx ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import SearchHeader from "@/components/SearchHeader";
2
+ import "./../globals.css";
3
+
4
+ export default function SearchLayout({ children }) {
5
+ return (
6
+ <div>
7
+ <SearchHeader />
8
+ {children}
9
+ {/* Footer */}
10
+ </div>
11
+ );
12
+ }
src/app/search/web/loading.jsx ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export default function loading() {
2
+ return (
3
+ <>
4
+ <div className="mx-2 pt-10 max-w-6xl lg:pl-52 animate-pulse">
5
+ <div className="h-2.5 w-48 bg-gray-200 rounded-full mb-2.5"></div>
6
+ <div className="h-3.5 max-w-[360px] bg-gray-200 rounded-full mb-2.5"></div>
7
+ <div className="h-2 max-w-[560px] bg-gray-200 rounded-full mb-2.5"></div>
8
+ <div className="h-2 max-w-[530px] bg-gray-200 rounded-full mb-2.5"></div>
9
+ </div>
10
+ <div className="mx-2 pt-10 max-w-6xl lg:pl-52 animate-pulse">
11
+ <div className="h-2.5 w-48 bg-gray-200 rounded-full mb-2.5"></div>
12
+ <div className="h-3.5 max-w-[360px] bg-gray-200 rounded-full mb-2.5"></div>
13
+ <div className="h-2 max-w-[560px] bg-gray-200 rounded-full mb-2.5"></div>
14
+ <div className="h-2 max-w-[530px] bg-gray-200 rounded-full mb-2.5"></div>
15
+ </div>
16
+ <div className="mx-2 pt-10 max-w-6xl lg:pl-52 animate-pulse">
17
+ <div className="h-2.5 w-48 bg-gray-200 rounded-full mb-2.5"></div>
18
+ <div className="h-3.5 max-w-[360px] bg-gray-200 rounded-full mb-2.5"></div>
19
+ <div className="h-2 max-w-[560px] bg-gray-200 rounded-full mb-2.5"></div>
20
+ <div className="h-2 max-w-[530px] bg-gray-200 rounded-full mb-2.5"></div>
21
+ </div>
22
+ </>
23
+ )
24
+ }
src/app/search/web/page.jsx ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export const dynamic = "force-dynamic";
2
+
3
+ import WebSearchResults from "@/components/WebSearchResults";
4
+ import Link from "next/link";
5
+
6
+ export default async function WebSearchPage({ searchParams }) {
7
+ const startIndex = searchParams.start || "1";
8
+ await new Promise((resolve) => setTimeout(resolve, 2000));
9
+ const response = await fetch(
10
+ `https://www.googleapis.com/customsearch/v1?key=${process.env.API_KEY}&cx=${process.env.CONTEXT_KEY}&q=${searchParams.searchTerm}}&start=${startIndex}`
11
+ );
12
+
13
+ if (!response.ok) {
14
+ console.log(response);
15
+ throw new Error("Something went wrong");
16
+ }
17
+
18
+ const data = await response.json();
19
+
20
+ const results = data.items;
21
+
22
+ if (!results) {
23
+ return (
24
+ <div className="flex flex-col justify-center items-center pt-10">
25
+ <h1 className="text-3xl mb-4">No results found</h1>
26
+ <p className="text-lg">
27
+ Try searching for something else or go back to the homepage{" "}
28
+ <Link href="/" className="text-blue-500">
29
+ Home
30
+ </Link>
31
+ </p>
32
+ </div>
33
+ );
34
+ }
35
+ return <>{results && <WebSearchResults results={data} />}</>;
36
+ }
src/components/CountryLookup.jsx ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client";
2
+
3
+ import { useEffect, useState } from "react";
4
+
5
+ export default function CountryLookup() {
6
+ const [country, setCountry] = useState("United States");
7
+ useEffect(() => {
8
+ fetch(
9
+ `https://extreme-ip-lookup.com/json/?key=${process.env.NEXT_PUBLIC_IP_API_KEY}`
10
+ )
11
+ .then((res) => res.json())
12
+ .then((data) => setCountry(data.country));
13
+ }, []);
14
+ return <div>{country}</div>;
15
+ }
src/components/Footer.jsx ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import CountryLookup from "./CountryLookup";
2
+
3
+ export default function Footer() {
4
+ return (
5
+ <footer className="absolute bottom-0 text-sm text-gray-500 bg-[#f2f2f2] w-full">
6
+
7
+ </footer>
8
+ );
9
+ }
src/components/HomeHeader.jsx ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import Link from "next/link";
2
+ import { TbGridDots } from "react-icons/tb";
3
+
4
+ export default function HomeHeader() {
5
+ return (
6
+ <header className="flex justify-end p-5 text-sm">
7
+
8
+ </header>
9
+ );
10
+ }
src/components/HomeSearch.jsx ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client";
2
+
3
+ import { useState } from "react";
4
+ import { useRouter } from "next/navigation";
5
+ import { AiOutlineSearch } from "react-icons/ai";
6
+ import { BsFillMicFill } from "react-icons/bs";
7
+ export default function HomeSearch() {
8
+ const router = useRouter();
9
+ const [input, setInput] = useState("");
10
+ const [randomSearchLoading, setRandomSearchLoading] = useState(false);
11
+ function handleSubmit(e) {
12
+ e.preventDefault();
13
+ if (!input.trim()) return;
14
+ router.push(`/search/web?searchTerm=${input}`);
15
+ }
16
+ async function randomSearch() {
17
+ setRandomSearchLoading(true);
18
+ const response = await fetch("https://random-word-api.herokuapp.com/word")
19
+ .then((res) => res.json())
20
+ .then((data) => data[0]);
21
+ if (!response) return;
22
+ router.push(`/search/web?searchTerm=${response}`);
23
+ setRandomSearchLoading(false);
24
+ }
25
+ return (
26
+ <>
27
+ <form
28
+ onSubmit={handleSubmit}
29
+ className="flex w-full mt-5 mx-auto max-w-[90%] border border-gray-200 px-5 py-3 rounded-full hover:shadow-md focus-within:shadow-md transition-shadow sm:max-w-xl lg:max-w-2xl"
30
+ >
31
+ <AiOutlineSearch className="text-xl text-gray-500 mr-3" />
32
+ <input
33
+ type="text"
34
+ className="flex-grow focus:outline-none"
35
+ onChange={(e) => setInput(e.target.value)}
36
+ value={input}
37
+ />
38
+ </form>
39
+
40
+ <div className="flex flex-col space-y-2 sm:space-y-0 sm:space-x-4 justify-center sm:flex-row mt-8 ">
41
+ <button onClick={handleSubmit} className="btn">
42
+ Google Search
43
+ </button>
44
+ <button
45
+ disabled={randomSearchLoading}
46
+ onClick={randomSearch}
47
+ className="btn flex items-center justify-center disabled:opacity-80"
48
+ >
49
+ {randomSearchLoading ? (
50
+ <img
51
+ src="spinner.svg"
52
+ alt="loading..."
53
+ className="h-6 text-center"
54
+ />
55
+ ) : (
56
+ "I'm feeling lucky"
57
+ )}
58
+ </button>
59
+ </div>
60
+ </>
61
+ );
62
+ }
src/components/ImageSearchResults.jsx ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import Link from "next/link";
2
+ import PaginationButtons from "./PaginationButtons";
3
+
4
+ export default function ImageSearchResults({ results }) {
5
+ return (
6
+ <div className="sm:pb-24 pb-40 mt-4">
7
+ <div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4 px-3 space-x-4">
8
+ {results.items.map((result) => (
9
+ <div key={result.link} className="mb-8">
10
+ <div className="group">
11
+ <Link href={result.image.contextLink}>
12
+ <img
13
+ src={result.link}
14
+ alt={result.title}
15
+ className="h-60 group-hover:shadow-xl w-full object-contain transition-shadow"
16
+ />
17
+ </Link>
18
+ <Link href={result.image.contextLink}>
19
+ <h2 className="group-hover:underline truncate text-xl">
20
+ {result.title}
21
+ </h2>
22
+ </Link>
23
+ <Link href={result.image.contextLink}>
24
+ <p className="group-hover:underline text-gray-600">
25
+ {result.displayLink}
26
+ </p>
27
+ </Link>
28
+ </div>
29
+ </div>
30
+ ))}
31
+ </div>
32
+ <div className="ml-16">
33
+ <PaginationButtons />
34
+ </div>
35
+ </div>
36
+ );
37
+ }
src/components/PaginationButtons.jsx ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client";
2
+
3
+ import Link from "next/link";
4
+ import { usePathname, useSearchParams } from "next/navigation";
5
+
6
+ import { BsChevronLeft, BsChevronRight } from "react-icons/bs";
7
+
8
+ export default function PaginationButtons() {
9
+ const pathname = usePathname();
10
+ const searchParams = useSearchParams();
11
+ const searchTerm = searchParams.get("searchTerm");
12
+ const startIndex = +searchParams.get("start") || 1;
13
+ return (
14
+ <div className="text-blue-700 flex px-10 pb-4 justify-between sm:justify-start sm:space-x-44 sm:px-0">
15
+ {startIndex >= 10 && (
16
+ <Link
17
+ href={`${pathname}?searchTerm=${searchTerm}&start=${startIndex - 10}`}
18
+ >
19
+ <div className="flex flex-col cursor-pointer items-center hover:underline">
20
+ <BsChevronLeft className="h-5" />
21
+ <p>Previous</p>
22
+ </div>
23
+ </Link>
24
+ )}
25
+ {startIndex <= 90 && (
26
+ <Link
27
+ href={`${pathname}?searchTerm=${searchTerm}&start=${startIndex + 10}`}
28
+ >
29
+ <div className="flex flex-col cursor-pointer items-center hover:underline">
30
+ <BsChevronRight className="h-5" />
31
+ <p>Next</p>
32
+ </div>
33
+ </Link>
34
+ )}
35
+ </div>
36
+ );
37
+ }
src/components/SearchBox.jsx ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client";
2
+
3
+ import { useSearchParams, useRouter } from "next/navigation";
4
+
5
+ import { RxCross2 } from "react-icons/rx";
6
+ import { BsFillMicFill } from "react-icons/bs";
7
+ import { AiOutlineSearch } from "react-icons/ai";
8
+ import { useState } from "react";
9
+
10
+ export default function SearchBox() {
11
+ const searchParams = useSearchParams();
12
+ const router = useRouter();
13
+ const searchTerm = searchParams.get("searchTerm");
14
+ const [term, setTerm] = useState(searchTerm || "");
15
+ function handleSubmit(e) {
16
+ e.preventDefault();
17
+ if (!term.trim()) return;
18
+ router.push(`/search/web?searchTerm=${term}`);
19
+ }
20
+ return (
21
+ <form
22
+ className="flex border border-gray-200 rounded-full shadow-lg px-6 py-3 ml-10 mr-5 flex-grow max-w-3xl items-center"
23
+ onSubmit={handleSubmit}
24
+ >
25
+ <input
26
+ type="text"
27
+ className="w-full focus:outline-none"
28
+ value={term}
29
+ onChange={(e) => setTerm(e.target.value)}
30
+ />
31
+ <RxCross2
32
+ className="text-2xl text-gray-500 cursor-pointer sm:mr-2"
33
+ onClick={() => setTerm("")}
34
+ />
35
+ <BsFillMicFill className="hidden sm:inline-flex text-4xl text-blue-500 pl-4 border-l-2 border-gray-300 mr-3" />
36
+ <AiOutlineSearch
37
+ className="text-2xl hidden sm:inline-flex text-blue-500 cursor-pointer"
38
+ onClick={handleSubmit}
39
+ />
40
+ </form>
41
+ );
42
+ }
src/components/SearchHeader.jsx ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import Image from "next/image";
2
+ import Link from "next/link";
3
+ import SearchBox from "./SearchBox";
4
+ import { RiSettings3Line } from "react-icons/ri";
5
+ import { TbGridDots } from "react-icons/tb";
6
+ import SearchHeaderOptions from "./SearchHeaderOptions";
7
+
8
+ export default function SearchHeader() {
9
+ return (
10
+ <header className="sticky top-0 bg-white">
11
+ <div className="flex w-full p-6 items-center justify-between">
12
+ <Link href={"/"}>
13
+ <Image
14
+ width="120"
15
+ height="40"
16
+ src="https://upload.wikimedia.org/wikipedia/commons/thumb/2/2f/Google_2015_logo.svg/640px-Google_2015_logo.svg.png"
17
+ />
18
+ </Link>
19
+ <div className="flex-1">
20
+ <SearchBox />
21
+ </div>
22
+ <div className="hidden md:inline-flex space-x-2 ">
23
+ <RiSettings3Line className="header-icon" />
24
+ <TbGridDots className="header-icon" />
25
+ </div>
26
+ <button className="bg-blue-500 text-white px-6 py-2 font-medium rounded-md hover:brightness-105 hover:shadow-md transition-all ml-2">
27
+ Sign in
28
+ </button>
29
+ </div>
30
+ <SearchHeaderOptions />
31
+ </header>
32
+ );
33
+ }
src/components/SearchHeaderOptions.jsx ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client"
2
+
3
+ import { AiOutlineCamera, AiOutlineSearch } from "react-icons/ai";
4
+ import { usePathname, useRouter, useSearchParams } from "next/navigation";
5
+ export default function SearchHeaderOptions() {
6
+ const pathname = usePathname();
7
+ const router = useRouter();
8
+ const searchParams = useSearchParams();
9
+ const searchTerm = searchParams.get("searchTerm");
10
+ function selectTab(tab) {
11
+ router.push(`/search/${tab === "Images" ? "image" : "web"}?searchTerm=${searchTerm}`);
12
+ }
13
+ return (
14
+ <div className="flex space-x-2 select-none border-b w-full justify-center lg:justify-start lg:pl-52 text-gray-700 text-sm">
15
+ <div onClick={()=>selectTab("All")} className={`flex items-center space-x-1 border-b-4 border-transparent active:text-blue-500 cursor-pointer pb-3 px-2 ${pathname === "/search/web" && "!text-blue-600 !border-blue-600"}`}>
16
+ <AiOutlineSearch className="text-md"/>
17
+ <p>All</p>
18
+ </div>
19
+ <div onClick={()=>selectTab("Images")} className={`flex items-center space-x-1 border-b-4 border-transparent active:text-blue-500 cursor-pointer pb-3 px-2 ${pathname === "/search/image" && "!text-blue-600 !border-blue-600"}`}>
20
+ <AiOutlineCamera className="text-md"/>
21
+ <p>Images</p>
22
+ </div>
23
+ </div>
24
+ )
25
+ }
src/components/WebSearchResults.jsx ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import Link from "next/link";
2
+ import Parser from "html-react-parser";
3
+ import PaginationButtons from "./PaginationButtons";
4
+
5
+ export default function WebSearchResults({ results }) {
6
+ return (
7
+ <div className="w-full mx-auto px-3 pb-40 sm:pb-24 sm:pl-[5%] md:pl-[14%] lg:pl-52">
8
+ <p className="text-gray-600 text-sm mb-5 mt-3">
9
+ About {results.searchInformation?.formattedTotalResults} results (
10
+ {results.searchInformation?.formattedSearchTime} seconds)
11
+ </p>
12
+ {results.items?.map((result) => (
13
+ <div className="mb-8 max-w-xl" key={result.link}>
14
+ <div className="group flex flex-col">
15
+ <Link className="text-sm truncate" href={result.link}>
16
+ {result.formattedUrl}
17
+ </Link>
18
+ <Link
19
+ className="group-hover:underline decoration-blue-800 text-xl truncate font-medium text-blue-800"
20
+ href={result.link}
21
+ >
22
+ {result.title}
23
+ </Link>
24
+ </div>
25
+ <p className="text-gray-600">{Parser(result.htmlSnippet)}</p>
26
+ </div>
27
+ ))}
28
+ <PaginationButtons />
29
+ </div>
30
+ );
31
+ }
tailwind.config.js ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /** @type {import('tailwindcss').Config} */
2
+ module.exports = {
3
+ content: [
4
+ "./app/**/*.{js,ts,jsx,tsx}",
5
+ "./pages/**/*.{js,ts,jsx,tsx}",
6
+ "./components/**/*.{js,ts,jsx,tsx}",
7
+
8
+ // Or if using `src` directory:
9
+ "./src/**/*.{js,ts,jsx,tsx}",
10
+ ],
11
+ theme: {
12
+ extend: {},
13
+ },
14
+ plugins: [],
15
+ }
utils.py DELETED
@@ -1,57 +0,0 @@
1
- import os
2
- import gradio as gr
3
- from zipfile import ZipFile
4
- from presets import *
5
-
6
- def refresh_json_list(plain=False):
7
- json_list = []
8
- for root, dirs, files in os.walk("./index"):
9
- for file in files:
10
- if os.path.splitext(file)[1] == '.json':
11
- json_list.append(os.path.splitext(file)[0])
12
- if plain:
13
- return json_list
14
- return gr.Dropdown.update(choices=json_list)
15
-
16
- def upload_file(file_obj):
17
- files = []
18
- with ZipFile(file_obj.name) as zfile:
19
- for zinfo in zfile.infolist():
20
- files.append(
21
- {
22
- "name": zinfo.filename,
23
- }
24
- )
25
- return files
26
-
27
- def reset_textbox():
28
- return gr.update(value='')
29
-
30
- def change_prompt_tmpl(tmpl_select):
31
- new_tmpl = prompt_tmpl_dict[tmpl_select]
32
- return gr.update(value=new_tmpl)
33
-
34
- def change_refine_tmpl(refine_select):
35
- new_tmpl = refine_tmpl_dict[refine_select]
36
- return gr.update(value=new_tmpl)
37
-
38
- def lock_params(index_type):
39
- if index_type == "GPTVectorStoreIndex" or index_type == "GPTListIndex":
40
- return gr.Slider.update(interactive=False, label="子节点数量(当前索引类型不可用)"), gr.Slider.update(interactive=False, label="每段关键词数量(当前索引类型不可用)")
41
- elif index_type == "GPTTreeIndex":
42
- return gr.Slider.update(interactive=True, label="子节点数量"), gr.Slider.update(interactive=False, label="每段关键词数量(当前索引类型不可用)")
43
- elif index_type == "GPTKeywordTableIndex":
44
- return gr.Slider.update(interactive=False, label="子节点数量(当前索引类型不可用)"), gr.Slider.update(interactive=True, label="每段关键词数量")
45
-
46
- def add_space(text):
47
- punctuations = {',': ', ', '。': '。 ', '?': '? ', '!': '! ', ':': ': ', ';': '; '}
48
- for cn_punc, en_punc in punctuations.items():
49
- text = text.replace(cn_punc, en_punc)
50
- return text
51
-
52
- ## create a test for parse_text
53
- def parse_text(text):
54
- lines = text.split("\n")
55
- lines = [line for line in lines if line != ""]
56
- text = "".join(lines)
57
- return text