Edit model card

Korean Pre-Trained Crypto RoBERTa model fine-tuned on BTC sentiment classification dataset.

For more details, check our work CBITS: Crypto BERT Incorporated Trading System on IEEE Access.

Example Use Case: BTC Sentiment Classification

from tokenization_roberta_spm import FairSeqRobertaSentencePieceTokenizer 
from transformers import XLMRobertaForSequenceClassification

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 

model = XLMRobertaForSequenceClassification.from_pretrained("LDKSolutions/KR-cryptoroberta-base", num_labels=3) 
model.eval()
model.to(device)

tokenizer = FairSeqRobertaSentencePieceTokenizer.from_pretrained("fairseq-roberta-all-model")

title = "์šฐ์ฆˆ๋ฒก, ์™ธ๊ตญ๊ธฐ์—…์˜ ์•”ํ˜ธํ™”ํ ๊ฑฐ๋ž˜์ž๊ธˆ ๊ตญ๋‚ด๊ณ„์ขŒ ์ž…๊ธˆ ํ—ˆ์šฉ" 
content = "๋น„ํŠธ์ฝ”์ธ๋‹ท์ปด์— ๋”ฐ๋ฅด๋ฉด ์šฐ์ฆˆ๋ฒ ํ‚ค์Šคํƒ„ ์ค‘์•™์€ํ–‰์ด ์™ธ๊ตญ๊ธฐ์—…์˜ ๊ตญ๋‚ด ์€ํ–‰ ๊ณ„์ขŒ ๊ฐœ์„ค ๋ฐ ์•”ํ˜ธํ™”ํ ๊ฑฐ๋ž˜ ์ž๊ธˆ ์ž…๊ธˆ์„ ํ—ˆ์šฉํ–ˆ๋‹ค. ์•ž์„œ ์šฐ์ฆˆ๋ฒ ํ‚ค์Šคํƒ„์€ ์™ธ๊ตญ๊ธฐ์—…์˜ ์€ํ–‰ ๊ณ„์ขŒ ๊ฐœ์„ค ๋“ฑ์„ ์ œํ•œ ๋ฐ ๊ธˆ์ง€ํ•œ ๋ฐ” ์žˆ๋‹ค. ๊ฐœ์ •์•ˆ์— ๋”ฐ๋ผ ์ด๋Ÿฌํ•œ ์ž๊ธˆ์€ ์•”ํ˜ธํ™”ํ ๋งค์ž…์„ ์œ„ํ•ด ๊ฑฐ๋ž˜์†Œ๋กœ ์ด์ฒด, ํ˜น์€ ์ž๊ธˆ์ด ์œ ์ž…๋œ ๊ด€ํ• ๊ถŒ ๋‚ด ๋“ฑ๋ก๋œ ๋ฒ•์ธ ๊ณ„์ขŒ๋กœ ์ด์ฒดํ•  ์ˆ˜ ์žˆ๋‹ค. ๋‹ค๋งŒ ๊ทธ ์™ธ ๋‹ค๋ฅธ ๋ชฉ์ ์„ ์œ„ํ•œ ์‚ฌ์šฉ์€ ๊ธˆ์ง€๋œ๋‹ค. ํ•ด๋‹น ๊ฐœ์ •์•ˆ์€ ์ง€๋‚œ 2์›” 9์ผ ๋ฐœํšจ๋๋‹ค."


encoded_input = tokenizer(str(title), str(content), max_length=512, padding="max_length", truncation=True, return_tensors="pt").to(device) 

with torch.no_grad(): 
    output = model(**encoded_input).logits
    output = nn.Softmax(dim=1)(output) 
    output = output.detach().cpu().numpy()[0] 
    print("ํ˜ธ์žฌ: {:.2f}% | ์•…์žฌ: {:.2f}% | ์ค‘๋ฆฝ: {:.2f}%".format(output[0]*100,output[1]*100,output[2]*100)) 

Example Use Case: Crypto Embedding Similarity

from tokenization_roberta_spm import FairSeqRobertaSentencePieceTokenizer 
from transformers import XLMRobertaForSequenceClassification

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 

model = AutoModel.from_pretrained("LDKSolutions/KR-cryptoroberta-base") 
model.eval()
model.to(device) 

tokenizer = FairSeqRobertaSentencePieceTokenizer.from_pretrained("fairseq-roberta-all-model")

title1 = "USDN ๋‹ค์ค‘๋‹ด๋ณด ์ž์‚ฐ ์ „ํ™˜ ์ œ์•ˆ ํ†ต๊ณผ"
content1 = "์›จ์ด๋ธŒ ์ƒํƒœ๊ณ„ ์Šคํ…Œ์ด๋ธ”์ฝ”์ธ USDN์„ ๋‹ค์ค‘๋‹ด๋ณด ์ž์‚ฐ์œผ๋กœ ์ „ํ™˜ํ•˜๋Š” ์ œ์•ˆ ํˆฌํ‘œ๊ฐ€ ์ฐฌ์„ฑ 99%๋กœ ์˜ค๋Š˜ ํ†ต๊ณผ๋๋‹ค. ์•ž์„œ ์ฝ”์ธ๋‹ˆ์Šค๋Š” ์›จ๋ธŒ๊ฐ€ $WX,$SWOP,$VIRES,$EGG,$WEST๋ฅผ ๋‹ด๋ณด๋กœ ํ•ด USDN์„ ์›จ์ด๋ธŒ ์ƒํƒœ๊ณ„ ์ธ๋ฑ์Šค ์ž์‚ฐ์œผ๋กœ ๋งŒ๋“ค์–ด USDN ๋””ํŽ˜๊น… ์ด์Šˆ๋ฅผ ํ•ด๊ฒฐํ•  ํ”Œ๋žœ์„ ๊ณต๊ฐœํ–ˆ๋‹ค๊ณ  ์ „ํ•œ ๋ฐ” ์žˆ๋‹ค."

title2 = "์›จ์ด๋ธŒ, USDN ๊ณ ๋ž˜ ์ฒญ์‚ฐ์•ˆ ํˆฌํ‘œ ํ†ต๊ณผ๋กœ 30%โ†‘"
content2 = "์œ ํˆฌ๋ฐ์ด์— ๋”ฐ๋ฅด๋ฉด ์›จ์ด๋ธŒ(WAVES) ๊ธฐ๋ฐ˜ ์•Œ๊ณ ๋ฆฌ์ฆ˜ ์Šคํ…Œ์ด๋ธ”์ฝ”์ธ ๋‰ดํŠธ๋ฆฌ๋…ธ(USDN)์˜ ๋””ํŽ˜๊ทธ ๋ฐœ์ƒ ์—†์ด ๋Œ€๊ทœ๋ชจ USDN ํฌ์ง€์…˜ ์ฒญ์‚ฐ์„ ๊ฐ€๋Šฅํ•˜๊ฒŒ ํ•˜๋Š” ํˆฌํ‘œ๊ฐ€ ๋งŒ์žฅ์ผ์น˜๋กœ ํ†ต๊ณผ ๋จ์— ๋”ฐ๋ผ WAVES๊ฐ€ ๋ช‡์‹œ๊ฐ„ ์•ˆ์— 30%๋Œ€ ์ƒ์Šนํญ์„ ๋‚˜ํƒ€๋ƒˆ๋‹ค. ์ง€๋‚œ 28์ผ ์›จ์ด๋ธŒ ํŒ€์ด ๋ฐœํ‘œํ•œ USDN์˜ ๋‹ฌ๋Ÿฌ ํŽ˜๊ทธ ํšŒ๋ณต ๊ณ„ํš์€ ๋‹ค์Œ๊ณผ ๊ฐ™๋‹ค.- ์ปค๋ธŒ ๋ฐ CRV ํ† ํฐ์œผ๋กœ USDN ์œ ๋™์„ฑ ๊ณต๊ธ‰.- ๊ณ ๋ž˜ ๊ณ„์ขŒ๋ฅผ ์ฒญ์‚ฐ์‹œ์ผœ Vires ์œ ๋™์„ฑ ๋ณต๊ตฌ.- USDN ๋‹ด๋ณด๋ฌผ์„ ๋‘๋‹ฌ์— ๊ฑธ์ณ ์ฒœ์ฒœํžˆ ํŒ๋งค.- ๋‰ดํŠธ๋ฆฌ๋…ธ ํ”„๋กœํ† ์ฝœ ์ž๋ณธ ์กฐ๋‹ฌ์„ ์œ„ํ•œ ์ƒˆ๋กœ์šด ํ† ํฐ ๋ฐœํ–‰."

encoded_input1 = tokenizer(str(title1), str(content1), max_length=512, padding="max_length", truncation=True, return_tensors="pt").to(device)
encoded_input2 = tokenizer(str(title2), str(content2), max_length=512, padding="max_length", truncation=True, return_tensors="pt").to(device)

with torch.no_grad(): 
    emb1 = model(**encoded_input1)[0][:,0,:].detach().cpu().numpy() 
    emb2 = model(**encoded_input2)[0][:,0,:].detach().cpu().numpy() 
    sim_scores = cdist(emb1, emb2, "cosine")[0] 
print(f"cosine distance = {sim_scores[0]}")
Downloads last month
36
Inference API
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.