cella110n commited on
Commit
354fdd6
1 Parent(s): 2e703bc

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +115 -1
README.md CHANGED
@@ -5,4 +5,118 @@ license: apache-2.0
5
  Finetuned from p1atdev/siglip-tagger-test-3
6
  https://huggingface.co/p1atdev/siglip-tagger-test-3
7
 
8
- test work
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  Finetuned from p1atdev/siglip-tagger-test-3
6
  https://huggingface.co/p1atdev/siglip-tagger-test-3
7
 
8
+ test work
9
+
10
+ Usage:
11
+ ```
12
+ import torch
13
+ import torch.nn as nn
14
+ import numpy as np
15
+ from dataclasses import dataclass
16
+ from transformers import SiglipVisionModel, SiglipPreTrainedModel, SiglipVisionConfig, AutoImageProcessor
17
+ from transformers.utils import ModelOutput
18
+
19
+ @dataclass
20
+ class SiglipForImageClassifierOutput(ModelOutput):
21
+ loss: torch.FloatTensor | None = None
22
+ logits: torch.FloatTensor | None = None
23
+ pooler_output: torch.FloatTensor | None = None
24
+ hidden_states: tuple[torch.FloatTensor, ...] | None = None
25
+ attentions: tuple[torch.FloatTensor, ...] | None = None
26
+
27
+ class SiglipForImageClassification(SiglipPreTrainedModel):
28
+ config_class = SiglipVisionConfig
29
+ main_input_name = "pixel_values"
30
+
31
+ def __init__(
32
+ self,
33
+ config,
34
+ ):
35
+ super().__init__(config)
36
+
37
+ # self.num_labels = config.num_labels
38
+ self.siglip = SiglipVisionModel(config)
39
+
40
+ # Classifier head
41
+ self.classifier = (
42
+ nn.Linear(config.hidden_size, config.num_labels)
43
+ if config.num_labels > 0
44
+ else nn.Identity()
45
+ )
46
+
47
+ # Initialize weights and apply final processing
48
+ self.post_init()
49
+
50
+ def forward(
51
+ self, pixel_values: torch.FloatTensor, labels: torch.LongTensor | None = None
52
+ ):
53
+ outputs = self.siglip(pixel_values)
54
+ pooler_output = outputs.pooler_output
55
+ logits = self.classifier(pooler_output)
56
+
57
+ loss = None
58
+ if labels is not None:
59
+ loss_fct = nn.BCEWithLogitsLoss()
60
+ loss = loss_fct(logits, labels)
61
+
62
+ return SiglipForImageClassifierOutput(
63
+ loss=loss,
64
+ logits=logits,
65
+ pooler_output=outputs.pooler_output,
66
+ hidden_states=outputs.hidden_states,
67
+ attentions=outputs.attentions,
68
+ )
69
+
70
+ # モデル設定のロード
71
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
72
+
73
+ config = SiglipVisionConfig.from_pretrained('cella110n/siglip-tagger-FT3ep')
74
+ processor = AutoImageProcessor.from_pretrained("cella110n/siglip-tagger-FT3ep", config=config)
75
+ model = SiglipForImageClassification.from_pretrained('cella110n/siglip-tagger-FT3ep', torch_dtype=torch.bfloat16).to(device)
76
+
77
+ model.eval()
78
+ print("Model Loaded. device:", model.device)
79
+
80
+ from PIL import Image
81
+
82
+ # 入力画像サイズの確認と調整
83
+ img_path = "path/to/image"
84
+ img = Image.open(img_path).
85
+
86
+ inputs = processor(images=img, return_tensors="pt") # 画像をモデルに適した形式に変換
87
+ print("Image processed.")
88
+
89
+ # inputs.pixel_valuesの画像を表示
90
+ img = inputs.pixel_values[0].permute(1, 2, 0).cpu().numpy()
91
+ plt.imshow(img)
92
+ plt.axis('off')
93
+ plt.show()
94
+
95
+ # # モデルの予測実行
96
+ with torch.no_grad():
97
+ logits = (model(
98
+ **inputs.to(
99
+ model.device,
100
+ model.dtype
101
+ )
102
+ )
103
+ .logits.detach()
104
+ .cpu()
105
+ .float()
106
+ )
107
+
108
+ logits = np.clip(logits, 0.0, 1.0) # オーバーフローを防ぐためにlogitsをクリップ
109
+
110
+ prob_cutoff = 0.3 # この確率以上のクラスのみを表示
111
+
112
+ result = {}
113
+
114
+ for prediction in logits:
115
+ for i, prob in enumerate(prediction):
116
+ if prob.item() > prob_cutoff:
117
+ result[model.config.id2label[i]] = prob.item()
118
+
119
+ # resultを、高いほうから表示
120
+ sorted_result = sorted(result.items(), key=lambda x: x[1], reverse=True)
121
+ sorted_result
122
+ ```