JaphetHernandez commited on
Commit
7c93cc3
1 Parent(s): 8ecc356

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -10
app.py CHANGED
@@ -28,14 +28,11 @@ model_id = "fireworks-ai/firefunction-v2"
28
  tokenizer = AutoTokenizer.from_pretrained(model_id)
29
  model = AutoModelForCausalLM.from_pretrained(
30
  model_id,
31
- device_map="auto", # Permitir el offload automático entre CPU y GPU
32
  torch_dtype=torch.float16,
33
  quantization_config=quant_config
34
  )
35
 
36
- # Asegurar que el modelo esté en la GPU
37
- model.to("cuda")
38
-
39
  # Establecer el token de relleno
40
  if tokenizer.pad_token_id is None:
41
  tokenizer.pad_token_id = tokenizer.eos_token_id
@@ -68,12 +65,19 @@ if uploaded_file is not None:
68
  batch_size = 4 # Reducir batch size para minimizar el uso de memoria
69
  job_titles_batches = [job_titles[i:i+batch_size] for i in range(0, len(job_titles), batch_size)]
70
 
71
- # Definir el prompt para Fireworks
72
  prompt_template = PromptTemplate(
73
  template=(
74
- "Calculate the cosine similarity between the query: '{query}' "
75
- "and the list of job titles: {job_titles}. "
76
- "Return the results as 'Job Title: [Job Title], Score: [Cosine Similarity Score]'."
 
 
 
 
 
 
 
77
  ),
78
  input_variables=["query", "job_titles"]
79
  )
@@ -90,13 +94,13 @@ if uploaded_file is not None:
90
  all_scores = []
91
  try:
92
  for batch in job_titles_batches:
93
- # Tokenizar la entrada y mover a CUDA
94
  model_inputs = tokenizer(
95
  batch,
96
  return_tensors="pt",
97
  padding=True,
98
  truncation=True
99
- ).to("cuda") # Mover explícitamente a CUDA
100
 
101
  with torch.cuda.amp.autocast(): # Usar Mixed Precision
102
  model_inputs['attention_mask'] = (model_inputs['input_ids'] != tokenizer.pad_token_id).int().to("cuda")
 
28
  tokenizer = AutoTokenizer.from_pretrained(model_id)
29
  model = AutoModelForCausalLM.from_pretrained(
30
  model_id,
31
+ device_map="auto", # Permitir offloading automático entre CPU y GPU
32
  torch_dtype=torch.float16,
33
  quantization_config=quant_config
34
  )
35
 
 
 
 
36
  # Establecer el token de relleno
37
  if tokenizer.pad_token_id is None:
38
  tokenizer.pad_token_id = tokenizer.eos_token_id
 
65
  batch_size = 4 # Reducir batch size para minimizar el uso de memoria
66
  job_titles_batches = [job_titles[i:i+batch_size] for i in range(0, len(job_titles), batch_size)]
67
 
68
+ # Definir el prompt para Fireworks con formato de función
69
  prompt_template = PromptTemplate(
70
  template=(
71
+ "Function: calculate_cosine_similarity\n"
72
+ "Description: Calculate the cosine similarity between the given query and job titles.\n"
73
+ "Parameters:\n"
74
+ " - query: The query string to compare.\n"
75
+ " - job_titles: A list of job titles to compare against.\n"
76
+ "Input:\n"
77
+ " query: '{query}'\n"
78
+ " job_titles: {job_titles}\n"
79
+ "Output:\n"
80
+ " Return the results as 'Job Title: [Job Title], Score: [Cosine Similarity Score]'."
81
  ),
82
  input_variables=["query", "job_titles"]
83
  )
 
94
  all_scores = []
95
  try:
96
  for batch in job_titles_batches:
97
+ # Tokenizar la entrada y mover los tensores a CUDA
98
  model_inputs = tokenizer(
99
  batch,
100
  return_tensors="pt",
101
  padding=True,
102
  truncation=True
103
+ ).to("cuda") # Mover solo los tensores de entrada a CUDA
104
 
105
  with torch.cuda.amp.autocast(): # Usar Mixed Precision
106
  model_inputs['attention_mask'] = (model_inputs['input_ids'] != tokenizer.pad_token_id).int().to("cuda")