Spaces:

tarunkumark2
/

salesagent

Sleeping

App Files Files Community

tarunkumark2 commited on Oct 23, 2024

Commit

7794643

1 Parent(s): 8596318

init

Browse files

Files changed (4) hide show

Dockerfile +44 -0
requirements-local.txt +4 -0
run_model.py +64 -0
templates/index.html +231 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,44 @@

+FROM python:3.9-alpine
+# Install necessary dependencies and tools
+RUN apk add --no-cache build-base cmake clang git && \
+    rm -rf /var/cache/apk/*
+# Clone the BitNet repository without history
+RUN git clone --recursive --depth 1 https://github.com/microsoft/BitNet.git && \
+    rm -rf BitNet/.git
+WORKDIR /BitNet
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the local requirements.txt for additional dependencies
+COPY requirements-local.txt .
+# Install additional dependencies from the local requirements file
+RUN pip install --no-cache-dir -r requirements-local.txt
+# Run the code generation for Llama3-8B model
+RUN python3 utils/codegen_tl2.py --model Llama3-8B-1.58-100B-tokens --BM 256,128,256,128 --BK 96,96,96,96 --bm 32,32,32,32
+# Build the model using cmake with specified compilers
+RUN cmake -B build -DBITNET_X86_TL2=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++
+RUN cmake --build build --config Release
+# Download the Llama model from HuggingFace
+ADD https://huggingface.co/brunopio/Llama3-8B-1.58-100B-tokens-GGUF/resolve/main/Llama3-8B-1.58-100B-tokens-TQ2_0.gguf .
+# Verify the integrity of the model file
+RUN echo "2565559c82a1d03ecd1101f536c5e99418d07e55a88bd5e391ed734f6b3989ac Llama3-8B-1.58-100B-tokens-TQ2_0.gguf" | sha256sum -c
+# Expose port for communication with the Node.js app
+EXPOSE 7860
+# Run a Python script that handles queries from the Node.js app using socket.io
+COPY . .
+# COPY templates/* .
+# Run the model in inference mode, listening for queries
+CMD ["python3", "run_model.py"]

requirements-local.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+flask
+flask-socketio
+requests
+eventlet

run_model.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from flask import Flask, render_template
+from flask_socketio import SocketIO, emit
+import subprocess
+import threading
+app = Flask(__name__)
+socketio = SocketIO(app, cors_allowed_origins='*')
+# Global variable to manage thread control
+stop_event = threading.Event()
+# Global variable to store the process reference
+current_process = None
+def stream_process_output(command):
+    """Execute a command and emit stdout line by line, with thread control."""
+    global current_process
+    # Start the subprocess
+    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1)
+    current_process = process  # Store the reference to the current process
+    for stdout_line in process.stdout:
+        if stop_event.is_set():  # Stop if the event is triggered
+            break
+        socketio.emit('response', {'word': stdout_line})
+        socketio.sleep(0.1)  # Yield to allow other threads to run
+    process.stdout.close()
+    process.wait()
+@socketio.on('query')
+def start_stream(data=None):
+    """Start the process and stream its stdout to the client, ensuring thread control."""
+    global stop_event, current_process
+    if data is None:
+        return
+    query = data['query']
+    command = ['python3', 'run_inference.py', '-m', 'Llama3-8B-1.58-100B-tokens-TQ2_0.gguf', '-p', query]
+    if 'args' in data and data.get('args'):
+        additional_args = data['args'].strip().split()
+        command.extend(additional_args)
+    print(f"command- {command}")
+    # If there is an existing running task, terminate it
+    if current_process and current_process.poll() is None:
+        current_process.terminate()
+    stop_event.set()  # Signal the current thread to stop
+    stop_event.clear()  # Reset the stop event for the new thread
+    # Start a new background task
+    socketio.start_background_task(target=stream_process_output, command=command)
+@app.route('/')
+def index():
+    return render_template('index.html')
+if __name__ == '__main__':
+    socketio.run(app, host='0.0.0.0', port=7860)

templates/index.html ADDED Viewed

	@@ -0,0 +1,231 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>BitNet 1-Bit LLM Query Interface</title>
+  <style>
+    body {
+      font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+      margin: 0;
+      padding: 0;
+      background-color: #f4f4f9;
+      display: flex;
+      justify-content: center;
+      align-items: center;
+      height: 100vh;
+    }
+    .container {
+      display: flex;
+      max-width: 1200px;
+      width: 100%;
+      background-color: #fff;
+      border-radius: 8px;
+      box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
+      overflow: hidden;
+    }
+    .left-section {
+      flex: 1;
+      padding: 20px;
+      background-color: white;
+      color: black;
+      display: flex;
+      flex-direction: column;
+      justify-content: space-between;
+      box-sizing: border-box;
+    }
+    .left-section h2 {
+      margin-bottom: 20px;
+      font-size: 1.5rem;
+    }
+    .input-group {
+      margin-bottom: 15px;
+    }
+    .input-group label {
+      display: block;
+      font-size: 0.9rem;
+      margin-bottom: 8px;
+    }
+    .input-group input,
+    .input-group textarea {
+      width: 100%;
+      padding: 10px;
+      border-radius: 4px;
+      font-size: 0.95rem;
+      box-sizing: border-box;
+    }
+    input[type="number"],
+    textarea {
+      width: 100%;
+      border: 1px solid #ccc;
+      border-radius: 4px;
+      box-sizing: border-box;
+    }
+    input[type="number"]:focus,
+    textarea:focus {
+      border-color: #238a95;
+      outline: none;
+      box-shadow: 0 0 5px rgba(35, 138, 149, 0.5);
+    }
+    input[type="number"]::placeholder,
+    textarea::placeholder {
+      font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+    }
+    .input-group textarea {
+      resize: none;
+    }
+    .button-group {
+      margin-top: 20px;
+      margin: auto;
+    }
+    .button-group button {
+      padding: 10px 20px;
+      background-color: #238a95;
+      border: none;
+      color: white;
+      cursor: pointer;
+      border-radius: 4px;
+      font-size: 1rem;
+      transition: background-color 0.3s, transform 0.2s;
+      font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+    }
+    .button-group button:hover {
+      background-color: #1e7a84;
+      transform: scale(1.05);
+    }
+    .right-section {
+      flex: 2;
+      padding: 20px;
+      background-color: #ffffff;
+      border-left: 2px solid #dbdbdb;
+      box-sizing: border-box;
+    }
+    .right-section h2 {
+      margin-bottom: 20px;
+    }
+    #response {
+      height: 430px;
+      border: 1px solid #dbdbdb;
+      border-radius: 4px;
+      padding: 10px;
+      overflow-y: auto;
+      font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+      background-color: #f5f4f459;
+      font-size: 0.95rem;
+    }
+    /* Responsive design */
+    @media screen and (max-width: 768px) {
+      .container {
+        flex-direction: column;
+      }
+      .right-section {
+        border-left: none;
+        border-top: 2px solid #dbdbdb;
+      }
+    }
+  </style>
+</head>
+<body>
+  <div class="container">
+    <div class="left-section">
+      <h2>Command Options</h2>
+      <div class="input-group">
+        <label for="tokens">Number of tokens to predict</label>
+        <input type="number" id="tokens" min="0" placeholder="Enter number of tokens">
+      </div>
+      <div class="input-group">
+        <label for="threads">Number of threads to use</label>
+        <input type="number" id="threads" min="0" placeholder="Enter number of threads">
+      </div>
+      <div class="input-group">
+        <label for="context-size">Size of the prompt context</label>
+        <input type="number" id="context-size" min="0" placeholder="Enter context size">
+      </div>
+      <div class="input-group">
+        <label for="temperature">Temperature, a hyperparameter that controls the randomness of the generated text</label>
+        <input type="number" min="0" id="temperature" placeholder="Enter temperature value">
+      </div>
+      <div class="input-group">
+        <label for="prompt">Prompt</label>
+        <textarea id="prompt" rows="4" min="0" placeholder="Enter your prompt"></textarea>
+      </div>
+      <div class="button-group">
+        <button onclick="sendQuery()">Send Query</button>
+      </div>
+    </div>
+    <div class="right-section">
+      <h2>Response</h2>
+      <div id="response"></div>
+    </div>
+  </div>
+  <script type="module">
+    import { io } from "https://cdn.socket.io/4.8.0/socket.io.esm.min.js";
+    const socket = io();
+    window.sendQuery = function() {
+      const tokens = document.getElementById('tokens').value;
+      const threads = document.getElementById('threads').value;
+      const contextSize = document.getElementById('context-size').value;
+      const temperature = document.getElementById('temperature').value;
+      const prompt = document.getElementById('prompt').value;
+      if (!prompt) {
+        return alert('There is no prompt to send!');
+      }
+      let args = '';
+      if (tokens && !isNaN(tokens) && tokens > -1) {
+        args += ` -n ${tokens}`;
+      }
+      if (threads && !isNaN(threads) && threads > -1) {
+        args += ` -t ${threads}`;
+      }
+      if (contextSize && !isNaN(contextSize) && contextSize > -1) {
+        args += ` -c ${contextSize}`;
+      }
+      if (temperature && !isNaN(temperature) && temperature > -1) {
+        args += ` -temp ${temperature}`;
+      }
+      // Clear previous response
+      document.getElementById('response').innerText = '';
+      // Emit query with all parameters
+      socket.emit('query', { query: prompt, args });
+    }
+    socket.on('response', function (word) {
+      const responseDiv = document.getElementById('response');
+      responseDiv.innerText += word.word + ' ';
+      // Scroll to the bottom of the response div
+      responseDiv.scrollTop = responseDiv.scrollHeight;
+    });
+  </script>
+</body>
+</html>