File size: 2,345 Bytes
3c9a94a
 
 
 
 
 
 
 
 
 
d1231be
3c9a94a
 
 
 
 
ae5bfc1
3c9a94a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1231be
 
 
 
 
 
 
 
 
 
 
 
 
 
3c9a94a
 
d1231be
 
 
a1e9365
3c9a94a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import time
import csv
import datetime
import gradio
import schedule
from gradio import utils
import huggingface_hub
from pathlib import Path
from src.utils.utilities import Utility
from transformers import RobertaTokenizer, T5ForConditionalGeneration

dataset_dir = "logs"
headers = ["input", "output", "timestamp", "elapsed"]
repo = huggingface_hub.Repository(
    local_dir=dataset_dir, 
    clone_from="https://huggingface.co/spaces/priyasaravana/CodeSummarization", 
    token=hf_token,
)
repo.git_pull(lfs=True)

def log_record(vals):    
    log_file = Path(dataset_dir) / "data.csv"    
    is_new = not Path(log_file).exists()        
    with open(log_file, "a", newline="", encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        if is_new:
            writer.writerow(utils.sanitize_list_for_csv(headers)) 
        writer.writerow(utils.sanitize_list_for_csv(vals))
    schedule.run_pending()
    print(f"Last Sync: {job.last_run}")   

def evaluate(sentence):
    tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-base')
    model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-base-multi-sum')

    # Prepare the input text
    input_text = code_snippet.strip()
    input_ids = tokenizer.encode(input_text, return_tensors='pt')
    # Generate a summary
    generated_ids = model.generate(input_ids, max_length=20)
    summary = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

    return summary

def predict(sentence):    
    timestamp = datetime.datetime.now().isoformat()    
    start_time = time.time()    
    predictions = evaluate([sentence])    
    elapsed_time = time.time() - start_time      
    output = predictions
    print(f"Sentence: {sentence} \nPrediction: {predictions}")    
    log_record([sentence, output, timestamp, str(elapsed_time)])
    
    return output

def sync_logs():
    print(f"Repo Clean: {repo.is_repo_clean()}")
    if not repo.is_repo_clean():
        repo.git_add()
        repo.git_commit()        
        repo.git_pull(lfs=True)
        result = repo.git_push()
        # result = repo.push_to_hub()
        print(result)
    
job = schedule.every(5).minutes.do(sync_logs)
print("Scheduler engaged")

gradio.Interface(
    fn=predict,
    inputs="text",
    outputs="text",
    allow_flagging='never'
).launch()