adding the exact data used to train this model
Browse files- .gitattributes +6 -0
- data/airoboros_3.2_without_contextual_slimorca_orca_sharegpt.json +3 -0
- data/capybara_sharegpt.json +3 -0
- data/cot_alpaca_gpt4_extracted_openhermes_2.5_sharegpt.json +3 -0
- data/merged_all.json +3 -0
- data/remove_empty_output.py +13 -0
- data/slimorca_dedup_filtered_95k_sharegpt.json +3 -0
- data/synthia-v1.3_sharegpt_12500.json +3 -0
.gitattributes
CHANGED
@@ -34,3 +34,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
einstein-v4-7b.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
einstein-v4-7b.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
data/airoboros_3.2_without_contextual_slimorca_orca_sharegpt.json filter=lfs diff=lfs merge=lfs -text
|
38 |
+
data/capybara_sharegpt.json filter=lfs diff=lfs merge=lfs -text
|
39 |
+
data/cot_alpaca_gpt4_extracted_openhermes_2.5_sharegpt.json filter=lfs diff=lfs merge=lfs -text
|
40 |
+
data/merged_all.json filter=lfs diff=lfs merge=lfs -text
|
41 |
+
data/slimorca_dedup_filtered_95k_sharegpt.json filter=lfs diff=lfs merge=lfs -text
|
42 |
+
data/synthia-v1.3_sharegpt_12500.json filter=lfs diff=lfs merge=lfs -text
|
data/airoboros_3.2_without_contextual_slimorca_orca_sharegpt.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22b3140cce72bfaad2ae423c2c9bafd9ce128cf7820e8be3b9f6d415390c5689
|
3 |
+
size 89066312
|
data/capybara_sharegpt.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1947d28999416a2f468d1e09654cfdfc9bab8ccd03aa184598d20f0000dd6e4
|
3 |
+
size 76361785
|
data/cot_alpaca_gpt4_extracted_openhermes_2.5_sharegpt.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a037af5bf62d30414b85d036c09c0f860922f66c3e7fd701abf809f7fc94c32
|
3 |
+
size 40074062
|
data/merged_all.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85e34219effaa00e2962d6acff3697a21e5ef86fc7b899e5732d5359d8866b26
|
3 |
+
size 582406346
|
data/remove_empty_output.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
|
3 |
+
with open('merged_all.json', 'r') as file:
|
4 |
+
data = json.load(file)
|
5 |
+
|
6 |
+
print(f"Normal len: {len(data)}")
|
7 |
+
|
8 |
+
data = [row for row in data if row["output"] != ""]
|
9 |
+
|
10 |
+
print(f"After len: {len(data)}")
|
11 |
+
|
12 |
+
with open('merged_all.json', 'w') as file:
|
13 |
+
json.dump(data, file, indent=1)
|
data/slimorca_dedup_filtered_95k_sharegpt.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:302e8d78b1f5f08bb7dd0ab7ded0204935003aea0b4c5bdbd8821d8924ab15f8
|
3 |
+
size 227955996
|
data/synthia-v1.3_sharegpt_12500.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbdbc7413a3c7fc65a900518f0db8627bb5ced53e1e8ee82613d09856c1b3b70
|
3 |
+
size 30638009
|