OramaSearch
/

query-translator-mini

PEFT

Safetensors

English

Model card Files Files and versions Community

micheleriva commited on Jan 8

Commit

26bbcaa

verified ·

1 Parent(s): 5319897

Update README.md

Browse files

Files changed (1) hide show

README.md +33 -17

README.md CHANGED Viewed

@@ -33,15 +33,10 @@ It understands various data types and query operators, making it versatile for d
 ## Usage
 ```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import PeftModel
-# Load the model and tokenizer
-model_name = "OramaSearch/query-translator-mini"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name)
-# System Prompt used during training
 SYSTEM_PROMPT = """
 You are a tool used to generate synthetic data of Orama queries. Orama is a full-text, vector, and hybrid search engine.
@@ -76,36 +71,57 @@ The rules to generate the query are:
   - Nested properties are supported. Just translate them into dot notation. Example: `{ "where": { "author.name": "John" } }`.
   - Array of numbers are not supported.
   - Array of booleans are not supported.
 """
-# Example query
-query = "What are the red wines that cost less than 20 dollars?"
-# Orama schema
-schema = {
-    "name": "string",
-    "content": "string",
     "price": "number",
-    "tags": "enum[]"
 }
-# Generate structured query
 messages = [
     {"role": "system", "content": SYSTEM_PROMPT},
-    {"role": "user", "content": f"Query: {query}\nSchema: {json.dumps(schema)}"},
 ]
 prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 outputs = model.generate(
     **inputs,
-    max_length=512,
     temperature=0.1,
     top_p=0.9,
     num_return_sequences=1,
 )
 response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 ```
 ## Training Details

 ## Usage
 ```python
+import json, torch
 from peft import PeftModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
 SYSTEM_PROMPT = """
 You are a tool used to generate synthetic data of Orama queries. Orama is a full-text, vector, and hybrid search engine.
   - Nested properties are supported. Just translate them into dot notation. Example: `{ "where": { "author.name": "John" } }`.
   - Array of numbers are not supported.
   - Array of booleans are not supported.
+Return just a JSON object, nothing more.
 """
+QUERY = "Show me some wine reviews with a score greater than 4.5 and less than 5.0."
+SCHEMA = {
+    "title": "string",
+    "description": "string",
     "price": "number",
 }
+base_model_name = "Qwen/Qwen2.5-7B"
+adapter_path = "OramaSearch/query-translator-mini"
+print("Loading tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(base_model_name)
+print("Loading base model...")
+model = AutoModelForCausalLM.from_pretrained(
+    base_model_name,
+    torch_dtype=torch.float16,
+    device_map="auto",
+    trust_remote_code=True,
+)
+print("Loading fine-tuned adapter...")
+model = PeftModel.from_pretrained(model, adapter_path)
+if torch.cuda.is_available():
+    model = model.cuda()
+    print(f"GPU memory after loading: {torch.cuda.memory_allocated(0) / 1024**2:.2f} MB")
 messages = [
     {"role": "system", "content": SYSTEM_PROMPT},
+    {"role": "user", "content": f"Query: {QUERY}\nSchema: {json.dumps(SCHEMA)}"},
 ]
 prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 outputs = model.generate(
     **inputs,
+    max_new_tokens=512,
+    do_sample=True,
     temperature=0.1,
     top_p=0.9,
     num_return_sequences=1,
 )
 response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+print(response)
 ```
 ## Training Details