class TokenizeAndAlignLabelsStep():

    # Adapted From : https://huggingface.co/docs/transformers/tasks/token_classification
    def tokenize_and_align_labels(self, examples, tokenizer):
        
        tokenized_inputs = tokenizer(examples, padding='max_length', truncation=True, max_length=128)
                
        # Map tokens to their respective word.
        word_ids = tokenized_inputs.word_ids()

        previous_word_idx = None
        
        tokens= []
        labels_mask = []        
        
        for word_idx in word_ids:  # Set the special tokens to -100.
            if word_idx is None:                    
                labels_mask.append(False)
            # Only label the first token of a given word.
            elif word_idx != previous_word_idx:                    
                labels_mask.append(True)
                tokens.append(tokenized_inputs["input_ids"][word_idx])
            else:                    
                labels_mask.append(False)
            
            previous_word_idx = word_idx
                        
        tokenized_inputs["tokens"] = tokens
        tokenized_inputs["labels_mask"] = labels_mask

        return tokenized_inputs
def main():    

    PIPELINE_REGISTRY.register_pipeline("PT-BERT-Large-CRF-HAREM-Default-pipeline",
                                        pipeline_class=BERT_CRF_Pipeline,
                                        pt_model=AutoModelForTokenClassification,
                                        )
    classifier = pipeline("PT-BERT-Large-CRF-HAREM-Default-pipeline", model="arubenruben/PT-BERT-Large-CRF-HAREM-Default",
                          device=torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"), trust_remote_code=True)
    out_path = os.path.join(sys.path[0], 'out', 'pipeline')
    repo = Repository(
        out_path, clone_from=f"arubenruben/PT-BERT-Large-CRF-HAREM-Default", use_auth_token=True)

    # repo.git_pull()

    classifier.save_pretrained(out_path)
    repo.push_to_hub()