DINGOLANI commited on
Commit
ed86361
·
verified ·
1 Parent(s): 754152d

Delete train.py

Browse files
Files changed (1) hide show
  1. train.py +0 -64
train.py DELETED
@@ -1,64 +0,0 @@
1
- import pandas as pd
2
- import torch
3
- import re
4
- from datasets import Dataset
5
- from transformers import (
6
- AutoModelForTokenClassification,
7
- AutoTokenizer,
8
- Trainer,
9
- TrainingArguments,
10
- DataCollatorForTokenClassification,
11
- )
12
- from huggingface_hub import notebook_login
13
-
14
- # Login to Hugging Face Hub (Make sure your Space is set to private if needed)
15
- notebook_login()
16
-
17
- # Step 1: Load Luxury Fashion Dataset (Replace with actual dataset)
18
- df = pd.read_csv("luxury_apparel_data.csv") # Update with correct dataset file
19
-
20
- # Keep only relevant columns
21
- df = df[['brand', 'category', 'description', 'price']].dropna()
22
-
23
- # Generate search queries from dataset
24
- df['query'] = df.apply(lambda x: f"{x['brand']} {x['category']} under {x['price']} AED", axis=1)
25
-
26
- # Step 2: Tokenization
27
- model_name = "dslim/bert-base-NER"
28
- tokenizer = AutoTokenizer.from_pretrained(model_name)
29
-
30
- def tokenize_batch(batch):
31
- return tokenizer(batch['query'], padding=True, truncation=True)
32
-
33
- # Convert dataframe into Hugging Face dataset
34
- hf_dataset = Dataset.from_pandas(df[['query']])
35
- hf_dataset = hf_dataset.map(tokenize_batch, batched=True)
36
-
37
- # Step 3: Fine-tune the Pretrained NER Model
38
- model = AutoModelForTokenClassification.from_pretrained(model_name)
39
-
40
- training_args = TrainingArguments(
41
- output_dir="./luxury_ner_model",
42
- evaluation_strategy="epoch",
43
- save_strategy="epoch",
44
- per_device_train_batch_size=8,
45
- per_device_eval_batch_size=8,
46
- num_train_epochs=3,
47
- logging_dir="./logs",
48
- logging_steps=500,
49
- )
50
-
51
- trainer = Trainer(
52
- model=model,
53
- args=training_args,
54
- train_dataset=hf_dataset,
55
- eval_dataset=hf_dataset,
56
- tokenizer=tokenizer,
57
- data_collator=DataCollatorForTokenClassification(tokenizer),
58
- )
59
-
60
- trainer.train()
61
-
62
- # Save model to Hugging Face Hub
63
- model.push_to_hub("luxury-fashion-ner")
64
- tokenizer.push_to_hub("luxury-fashion-ner")