Edit model card

How to use

Load model and tokenizer

from transformers import AutoTokenizer, AutoModelForTokenClassification

tokenizer = AutoTokenizer.from_pretrained("ageng-anugrah/indobert-large-p2-finetuned-ner")
model = AutoModelForTokenClassification.from_pretrained("ageng-anugrah/indobert-large-p2-finetuned-ner")

Extract NER Tag

import torch
def predict(model, tokenizer, sentence):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    inputs = tokenizer(sentence.split(),
                    is_split_into_words = True,
                    return_offsets_mapping=True, 
                    return_tensors="pt",
                    padding='max_length', 
                    truncation=True, 
                    max_length=512)
    
    model.to(device)
    # move to gpu
    ids = inputs["input_ids"].to(device)
    mask = inputs["attention_mask"].to(device)
    
    # forward pass
    outputs = model(ids, attention_mask=mask)
    logits = outputs[0]

    active_logits = logits.view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
    flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size*seq_len,) - predictions at the token level

    tokens = tokenizer.convert_ids_to_tokens(ids.squeeze().tolist())
    token_predictions = [model.config.id2label[i] for i in flattened_predictions.cpu().numpy()]
    wp_preds = list(zip(tokens, token_predictions)) # list of tuples. Each tuple = (wordpiece, prediction)

    prediction = []
    for token_pred, mapping in zip(wp_preds, inputs["offset_mapping"].squeeze().tolist()):
        #only predictions on first word pieces are important
        if mapping[0] == 0 and mapping[1] != 0:
            prediction.append(token_pred[1])
        else:
            continue

    return sentence.split(), prediction

sentence = "BJ Habibie adalah Presiden Indonesia ke-3 yang lahir pada tanggl 25 Juni 1936"
words, labels = predict(model, tokenizer, sentence)
Downloads last month
7
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.