Spaces:
Runtime error
Runtime error
import pandas as pd | |
import torch | |
import re | |
from datasets import Dataset | |
from transformers import ( | |
AutoModelForTokenClassification, | |
AutoTokenizer, | |
Trainer, | |
TrainingArguments, | |
DataCollatorForTokenClassification, | |
) | |
from huggingface_hub import notebook_login | |
# Login to Hugging Face Hub (Make sure your Space is set to private if needed) | |
notebook_login() | |
# Step 1: Load Luxury Fashion Dataset (Replace with actual dataset) | |
df = pd.read_csv("luxury_apparel_data.csv") # Update with correct dataset file | |
# Keep only relevant columns | |
df = df[['brand', 'category', 'description', 'price']].dropna() | |
# Generate search queries from dataset | |
df['query'] = df.apply(lambda x: f"{x['brand']} {x['category']} under {x['price']} AED", axis=1) | |
# Step 2: Tokenization | |
model_name = "dslim/bert-base-NER" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
def tokenize_batch(batch): | |
return tokenizer(batch['query'], padding=True, truncation=True) | |
# Convert dataframe into Hugging Face dataset | |
hf_dataset = Dataset.from_pandas(df[['query']]) | |
hf_dataset = hf_dataset.map(tokenize_batch, batched=True) | |
# Step 3: Fine-tune the Pretrained NER Model | |
model = AutoModelForTokenClassification.from_pretrained(model_name) | |
training_args = TrainingArguments( | |
output_dir="./luxury_ner_model", | |
evaluation_strategy="epoch", | |
save_strategy="epoch", | |
per_device_train_batch_size=8, | |
per_device_eval_batch_size=8, | |
num_train_epochs=3, | |
logging_dir="./logs", | |
logging_steps=500, | |
) | |
trainer = Trainer( | |
model=model, | |
args=training_args, | |
train_dataset=hf_dataset, | |
eval_dataset=hf_dataset, | |
tokenizer=tokenizer, | |
data_collator=DataCollatorForTokenClassification(tokenizer), | |
) | |
trainer.train() | |
# Save model to Hugging Face Hub | |
model.push_to_hub("luxury-fashion-ner") | |
tokenizer.push_to_hub("luxury-fashion-ner") |