Nuclear Ai commited on
Commit
e2252a5
1 Parent(s): 19b7ff6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +66 -1
README.md CHANGED
@@ -25,4 +25,69 @@ pipeline_tag: text-generation
25
 
26
  # Dataset used for training !
27
 
28
- We Used : [NuclearAi/Nuke-Python-Verse](https://huggingface.co/datasets/NuclearAi/Nuke-Python-Verse) To Finetune *Qwen2-1.5B-Instruct Model* on a Large Amount of Dataset of **240,888** Unique lines of Python Codes Scraped from Publicly Available Datasets !
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  # Dataset used for training !
27
 
28
+ We Used : [NuclearAi/Nuke-Python-Verse](https://huggingface.co/datasets/NuclearAi/Nuke-Python-Verse) To Finetune *Qwen2-1.5B-Instruct Model* on a Large Amount of Dataset of **240,888** Unique lines of Python Codes Scraped from Publicly Available Datasets !
29
+
30
+
31
+ ```
32
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextStreamer
33
+ import torch
34
+
35
+ device = "cuda" if torch.cuda.is_available() else "cpu" # Use GPU if available, else fallback to CPU
36
+
37
+ # Configure for 4-bit quantization using bitsandbytes
38
+ bnb_config = BitsAndBytesConfig(
39
+ load_in_4bit=True, # Enable 4-bit quantization
40
+ bnb_4bit_use_double_quant=True, # Use double quantization
41
+ bnb_4bit_compute_dtype=torch.float16 # Use float16 computation for improved performance
42
+ )
43
+
44
+ # Load the model with the specified configuration
45
+ model = AutoModelForCausalLM.from_pretrained(
46
+ "NuclearAi/Hyper-X-Qwen2-1.5B-It-Python",
47
+ quantization_config=bnb_config, # Apply the 4-bit quantization configuration
48
+ torch_dtype="auto", # Automatic selection of data type
49
+ device_map="auto" if device == "cuda" else None # Automatically select the device for GPU, or fallback to CPU
50
+ )
51
+
52
+ # Load the tokenizer
53
+ tokenizer = AutoTokenizer.from_pretrained("NuclearAi/Hyper-X-Qwen2-1.5B-It-Python")
54
+
55
+ # Initialize a text streamer for streaming the output
56
+ streamer = TextStreamer(tokenizer)
57
+
58
+ # Function to generate a response from the model based on the user's input
59
+ def generate_response(user_input):
60
+ # Tokenize the user input
61
+ input_ids = tokenizer.encode(user_input, return_tensors="pt").to(device)
62
+
63
+ # Generate the model's response with streaming enabled
64
+ generated_ids = model.generate(
65
+ input_ids,
66
+ max_new_tokens=128,
67
+ pad_token_id=tokenizer.eos_token_id, # Handle padding for generation
68
+ streamer=streamer # Use the streamer for real-time token output
69
+ )
70
+
71
+ # Decode the response from token IDs to text
72
+ response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
73
+ return response.strip()
74
+
75
+ # Start the conversation loop
76
+ print("You can start chatting with the model. Type 'exit' to stop the conversation.")
77
+ while True:
78
+ # Get the user's input
79
+ user_input = input("You: ")
80
+
81
+ # Check if the user wants to exit the conversation
82
+ if user_input.lower() in ["exit", "quit", "stop"]:
83
+ print("Ending the conversation. Goodbye!")
84
+ break
85
+
86
+ # Generate the model's response
87
+ print("Assistant: ", end="", flush=True) # Prepare to print the response
88
+ response = generate_response(user_input)
89
+
90
+ # The TextStreamer already prints the response token by token, so we just print a newline
91
+ print() # Ensure to move to the next line after the response is printed
92
+
93
+ ```