HODACHI commited on
Commit
23afa1d
·
verified ·
1 Parent(s): fa90ebf

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +10 -7
README.md CHANGED
@@ -99,6 +99,9 @@ model = AutoModel.from_pretrained(
99
 
100
  The reason for writing the code this way is to avoid errors that occur during multi-GPU inference due to tensors not being on the same device. By ensuring that the first and last layers of the large language model (LLM) are on the same device, we prevent such errors.
101
 
 
 
 
102
  ```python
103
  import math
104
  import torch
@@ -239,7 +242,7 @@ tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast
239
  pixel_values = load_image('./examples/image1.jpg', max_num=12).to(torch.bfloat16).cuda()
240
  generation_config = dict(max_new_tokens=1024, do_sample=False)
241
 
242
- # pure-text conversation (纯文本对话)
243
  question = 'Hello, who are you?'
244
  response, history = model.chat(tokenizer, None, question, generation_config, history=None, return_history=True)
245
  print(f'User: {question}\nAssistant: {response}')
@@ -248,12 +251,12 @@ question = 'Can you tell me a story?'
248
  response, history = model.chat(tokenizer, None, question, generation_config, history=history, return_history=True)
249
  print(f'User: {question}\nAssistant: {response}')
250
 
251
- # single-image single-round conversation (单图单轮对话)
252
  question = '<image>\nPlease describe the image shortly.'
253
  response = model.chat(tokenizer, pixel_values, question, generation_config)
254
  print(f'User: {question}\nAssistant: {response}')
255
 
256
- # single-image multi-round conversation (单图多轮对话)
257
  question = '<image>\nPlease describe the image in detail.'
258
  response, history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
259
  print(f'User: {question}\nAssistant: {response}')
@@ -262,7 +265,7 @@ question = 'Please write a poem according to the image.'
262
  response, history = model.chat(tokenizer, pixel_values, question, generation_config, history=history, return_history=True)
263
  print(f'User: {question}\nAssistant: {response}')
264
 
265
- # multi-image multi-round conversation, combined images (多图多轮对话,拼接图像)
266
  pixel_values1 = load_image('./examples/image1.jpg', max_num=12).to(torch.bfloat16).cuda()
267
  pixel_values2 = load_image('./examples/image2.jpg', max_num=12).to(torch.bfloat16).cuda()
268
  pixel_values = torch.cat((pixel_values1, pixel_values2), dim=0)
@@ -277,7 +280,7 @@ response, history = model.chat(tokenizer, pixel_values, question, generation_con
277
  history=history, return_history=True)
278
  print(f'User: {question}\nAssistant: {response}')
279
 
280
- # multi-image multi-round conversation, separate images (多图多轮对话,独立图像)
281
  pixel_values1 = load_image('./examples/image1.jpg', max_num=12).to(torch.bfloat16).cuda()
282
  pixel_values2 = load_image('./examples/image2.jpg', max_num=12).to(torch.bfloat16).cuda()
283
  pixel_values = torch.cat((pixel_values1, pixel_values2), dim=0)
@@ -295,7 +298,7 @@ response, history = model.chat(tokenizer, pixel_values, question, generation_con
295
  history=history, return_history=True)
296
  print(f'User: {question}\nAssistant: {response}')
297
 
298
- # batch inference, single image per sample (单图批处理)
299
  pixel_values1 = load_image('./examples/image1.jpg', max_num=12).to(torch.bfloat16).cuda()
300
  pixel_values2 = load_image('./examples/image2.jpg', max_num=12).to(torch.bfloat16).cuda()
301
  num_patches_list = [pixel_values1.size(0), pixel_values2.size(0)]
@@ -309,7 +312,7 @@ responses = model.batch_chat(tokenizer, pixel_values,
309
  for question, response in zip(questions, responses):
310
  print(f'User: {question}\nAssistant: {response}')
311
 
312
- # video multi-round conversation (视频多轮对话)
313
  def get_index(bound, fps, max_frame, first_idx=0, num_segments=32):
314
  if bound:
315
  start, end = bound[0], bound[1]
 
99
 
100
  The reason for writing the code this way is to avoid errors that occur during multi-GPU inference due to tensors not being on the same device. By ensuring that the first and last layers of the large language model (LLM) are on the same device, we prevent such errors.
101
 
102
+ このようにコードを書く理由は、テンソルが同じデバイス上にないためにマルチGPU推論中に発生するエラーを避けるためです。
103
+ ラージ・ランゲージ・モデル(LLM)の最初のレイヤーと最後のレイヤーが同じデバイス上にあるようにすることで、このようなエラーを防ぐことができます。
104
+
105
  ```python
106
  import math
107
  import torch
 
242
  pixel_values = load_image('./examples/image1.jpg', max_num=12).to(torch.bfloat16).cuda()
243
  generation_config = dict(max_new_tokens=1024, do_sample=False)
244
 
245
+ # pure-text conversation (テキストのみの対話)
246
  question = 'Hello, who are you?'
247
  response, history = model.chat(tokenizer, None, question, generation_config, history=None, return_history=True)
248
  print(f'User: {question}\nAssistant: {response}')
 
251
  response, history = model.chat(tokenizer, None, question, generation_config, history=history, return_history=True)
252
  print(f'User: {question}\nAssistant: {response}')
253
 
254
+ # single-image single-round conversation (単一画像、単一ラウンド対話)
255
  question = '<image>\nPlease describe the image shortly.'
256
  response = model.chat(tokenizer, pixel_values, question, generation_config)
257
  print(f'User: {question}\nAssistant: {response}')
258
 
259
+ # single-image multi-round conversation (単一画像、多ラウンド対話)
260
  question = '<image>\nPlease describe the image in detail.'
261
  response, history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
262
  print(f'User: {question}\nAssistant: {response}')
 
265
  response, history = model.chat(tokenizer, pixel_values, question, generation_config, history=history, return_history=True)
266
  print(f'User: {question}\nAssistant: {response}')
267
 
268
+ # multi-image multi-round conversation, combined images (複数画像、複数ラウンド対話、画像のステッチング)
269
  pixel_values1 = load_image('./examples/image1.jpg', max_num=12).to(torch.bfloat16).cuda()
270
  pixel_values2 = load_image('./examples/image2.jpg', max_num=12).to(torch.bfloat16).cuda()
271
  pixel_values = torch.cat((pixel_values1, pixel_values2), dim=0)
 
280
  history=history, return_history=True)
281
  print(f'User: {question}\nAssistant: {response}')
282
 
283
+ # multi-image multi-round conversation, separate images (別々の画像による多画像多ラウンド対話)
284
  pixel_values1 = load_image('./examples/image1.jpg', max_num=12).to(torch.bfloat16).cuda()
285
  pixel_values2 = load_image('./examples/image2.jpg', max_num=12).to(torch.bfloat16).cuda()
286
  pixel_values = torch.cat((pixel_values1, pixel_values2), dim=0)
 
298
  history=history, return_history=True)
299
  print(f'User: {question}\nAssistant: {response}')
300
 
301
+ # batch inference, single image per sample (単一画像バッチ処理)
302
  pixel_values1 = load_image('./examples/image1.jpg', max_num=12).to(torch.bfloat16).cuda()
303
  pixel_values2 = load_image('./examples/image2.jpg', max_num=12).to(torch.bfloat16).cuda()
304
  num_patches_list = [pixel_values1.size(0), pixel_values2.size(0)]
 
312
  for question, response in zip(questions, responses):
313
  print(f'User: {question}\nAssistant: {response}')
314
 
315
+ # video multi-round conversation (ビデオ・マルチラウンド・ダイアログ)
316
  def get_index(bound, fps, max_frame, first_idx=0, num_segments=32):
317
  if bound:
318
  start, end = bound[0], bound[1]