NuclearAi
/

Hyper-X-Qwen2-1.5B-It-Python

@@ -25,4 +25,69 @@ pipeline_tag: text-generation
 # Dataset used for training !
-We Used : [NuclearAi/Nuke-Python-Verse](https://huggingface.co/datasets/NuclearAi/Nuke-Python-Verse) To Finetune *Qwen2-1.5B-Instruct Model* on a Large Amount of Dataset of **240,888** Unique lines of Python Codes Scraped from Publicly Available Datasets !

 # Dataset used for training !
+We Used : [NuclearAi/Nuke-Python-Verse](https://huggingface.co/datasets/NuclearAi/Nuke-Python-Verse) To Finetune *Qwen2-1.5B-Instruct Model* on a Large Amount of Dataset of **240,888** Unique lines of Python Codes Scraped from Publicly Available Datasets !
+```
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextStreamer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"  # Use GPU if available, else fallback to CPU
+# Configure for 4-bit quantization using bitsandbytes
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,     # Enable 4-bit quantization
+    bnb_4bit_use_double_quant=True,  # Use double quantization
+    bnb_4bit_compute_dtype=torch.float16  # Use float16 computation for improved performance
+)
+# Load the model with the specified configuration
+model = AutoModelForCausalLM.from_pretrained(
+    "NuclearAi/Hyper-X-Qwen2-1.5B-It-Python",
+    quantization_config=bnb_config,  # Apply the 4-bit quantization configuration
+    torch_dtype="auto",              # Automatic selection of data type
+    device_map="auto" if device == "cuda" else None  # Automatically select the device for GPU, or fallback to CPU
+)
+# Load the tokenizer
+tokenizer = AutoTokenizer.from_pretrained("NuclearAi/Hyper-X-Qwen2-1.5B-It-Python")
+# Initialize a text streamer for streaming the output
+streamer = TextStreamer(tokenizer)
+# Function to generate a response from the model based on the user's input
+def generate_response(user_input):
+    # Tokenize the user input
+    input_ids = tokenizer.encode(user_input, return_tensors="pt").to(device)
+    # Generate the model's response with streaming enabled
+    generated_ids = model.generate(
+        input_ids,
+        max_new_tokens=128,
+        pad_token_id=tokenizer.eos_token_id,  # Handle padding for generation
+        streamer=streamer                     # Use the streamer for real-time token output
+    )
+    # Decode the response from token IDs to text
+    response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
+    return response.strip()
+# Start the conversation loop
+print("You can start chatting with the model. Type 'exit' to stop the conversation.")
+while True:
+    # Get the user's input
+    user_input = input("You: ")
+    # Check if the user wants to exit the conversation
+    if user_input.lower() in ["exit", "quit", "stop"]:
+        print("Ending the conversation. Goodbye!")
+        break
+    # Generate the model's response
+    print("Assistant: ", end="", flush=True)  # Prepare to print the response
+    response = generate_response(user_input)
+    # The TextStreamer already prints the response token by token, so we just print a newline
+    print()  # Ensure to move to the next line after the response is printed
+```