File size: 36,529 Bytes
26f0d6d
 
 
 
 
 
57ebbd2
26f0d6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57ebbd2
26f0d6d
 
68f38dd
26f0d6d
 
68f38dd
26f0d6d
 
 
 
 
57ebbd2
26f0d6d
57ebbd2
 
 
e2e58ad
57ebbd2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import fitz
import os

model = AutoModelForSequenceClassification.from_pretrained("Reem333/Longformer")
tokenizer = AutoTokenizer.from_pretrained("allenai/longformer-base-4096")

def extract_text_from_pdf(file_path):
    text = ''
    with fitz.open(file_path) as pdf_document:
        for page_number in range(pdf_document.page_count):
            page = pdf_document.load_page(page_number)
            text += page.get_text()
    return text

def predict_class(text):
    try:
        max_length = 4096
        truncated_text = text[:max_length]

        inputs = tokenizer(truncated_text, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits
            predicted_class = torch.argmax(logits, dim=1).item()
        return predicted_class
    except Exception as e:
        st.error(f"Error during prediction: {e}")
        return None

uploaded_files_dir = "uploaded_files"
os.makedirs(uploaded_files_dir, exist_ok=True)

class_colors = {
    0: "#d62728",  # Level 1
    1: "#ff7f0e",  # Level 2
    2: "#2ca02c",  # Level 3
    3: "#1f77b4"   # Level 4
}

st.set_page_config(page_title="Paper Citation Classifier", page_icon="logo.png")

with st.sidebar:
    st.image("logo.png", width=70)
    st.markdown('<div style="position: absolute; left: 5px;"></div>', unsafe_allow_html=True)
    
    st.markdown("# Paper Citation Classifier")
    st.markdown("---")
    st.markdown("## About")
    st.markdown('''
    This is a tool to classify paper citations into different levels based on their number of citations.
    Powered by Fine-Tuned [Longformer model](https://huggingface.co/Reem333/Longformer) with custom data.
    ''')
    st.markdown("### Class Levels:")
    st.markdown("- Level 1: Low cited papers")
    st.markdown("- Level 2: Average cited papers")
    st.markdown("- Level 3: More cited papers")
    st.markdown("- Level 4: Highly cited papers")
    st.markdown("---")
    st.markdown('Tabuk University')

st.title("Check Your Paper Now!")

#option = st.radio("Select input type:", ("Text", "PDF"))

#if option == "Text":
title_input = st.text_area("Enter Title:")
abstract_input = st.text_area("Enter Abstract:")
    
affiliations_input = st.text_area("Enter Affiliations:")
keywords_input = st.text_area("Enter Keywords:")
options=['Environmental Sciences; Remote Sensing', 'Construction & Building Technology; Energy & Fuels; Engineering, Civil', 'Energy & Fuels', 'Chemistry, Physical; Materials Science, Multidisciplinary; Metallurgy & Metallurgical Engineering', 'Multidisciplinary Sciences', 'Nursing', 'Chemistry, Applied; Food Science & Technology; Nutrition & Dietetics', 'Medicine, General & Internal', 'Computer Science, Artificial Intelligence; Computer Science, Software Engineering; Computer Science, Theory & Methods; Engineering, Electrical & Electronic; Optics', 'Pharmacology & Pharmacy; Toxicology', 'Surgery', 'Biodiversity Conservation; Environmental Sciences', 'Engineering, Chemical', 'Infectious Diseases; Microbiology', 'Neurosciences', 'Environmental Sciences; Meteorology & Atmospheric Sciences', 'Engineering, Environmental; Environmental Sciences', 'Health Care Sciences & Services; Public, Environmental & Occupational Health', 'Pharmacology & Pharmacy', 'Agriculture, Multidisciplinary', 'Environmental Sciences', 'Cardiac & Cardiovascular Systems', 'Endocrinology & Metabolism', 'Environmental Studies', 'Forestry', 'Behavioral Sciences; Nutrition & Dietetics', 'Computer Science, Information Systems; Health Care Sciences & Services; Medical Informatics', 'Computer Science, Software Engineering; Computer Science, Theory & Methods', 'Geochemistry & Geophysics', 'Parasitology; Tropical Medicine', 'Fisheries', 'Thermodynamics', 'Environmental Sciences; Public, Environmental & Occupational Health', 'Business', 'Plant Sciences; Chemistry, Medicinal; Integrative & Complementary Medicine; Pharmacology & Pharmacy', 'Medicine, Research & Experimental; Pharmacology & Pharmacy', 'Engineering, Multidisciplinary; Mathematics, Interdisciplinary Applications; Mechanics', 'Materials Science, Coatings & Films; Physics, Applied', 'Oncology; Radiology, Nuclear Medicine & Medical Imaging', 'Food Science & Technology', 'Biochemistry & Molecular Biology; Biotechnology & Applied Microbiology', 'Engineering, Mechanical; Materials Science, Characterization & Testing', 'Biology; Mathematical & Computational Biology', 'Chemistry, Multidisciplinary', 'Public, Environmental & Occupational Health; Infectious Diseases', 'Nutrition & Dietetics', 'Mathematics, Applied; Mathematics, Interdisciplinary Applications; Mechanics; Physics, Fluids & Plasmas; Physics, Mathematical', 'Medicine, Legal; Social Sciences, Biomedical', 'Biochemistry & Molecular Biology', 'Infectious Diseases', 'Spectroscopy', 'Agriculture, Dairy & Animal Science', 'Business, Finance', 'Psychology, Developmental; Neurosciences', 'Public, Environmental & Occupational Health', 'Thermodynamics; Engineering, Mechanical; Mechanics', 'Ecology; Environmental Sciences', 'Geochemistry & Geophysics; Meteorology & Atmospheric Sciences', 'Oncology; Urology & Nephrology', 'Geography, Physical; Geosciences, Multidisciplinary; Remote Sensing; Imaging Science & Photographic Technology', 'Psychiatry', 'Chemistry, Physical; Materials Science, Coatings & Films; Physics, Applied; Physics, Condensed Matter', 'Computer Science, Interdisciplinary Applications; Education & Educational Research', 'Environmental Studies; International Relations', 'Nuclear Science & Technology', 'Computer Science, Software Engineering', 'Radiology, Nuclear Medicine & Medical Imaging', 'Infectious Diseases; Pharmacology & Pharmacy', 'Surgery; Transplantation', 'Business, Finance; Economics', 'Clinical Neurology', 'Development Studies; Environmental Studies; Regional & Urban Planning; Urban Studies', 'Materials Science, Multidisciplinary; Metallurgy & Metallurgical Engineering', 'Automation & Control Systems; Computer Science, Artificial Intelligence; Robotics', 'Urology & Nephrology', 'Biochemistry & Molecular Biology; Immunology', 'Green & Sustainable Science & Technology; Engineering, Environmental; Environmental Sciences', 'Statistics & Probability', 'Cell & Tissue Engineering; Biotechnology & Applied Microbiology; Cell Biology', 'Materials Science, Multidisciplinary; Physics, Multidisciplinary', 'Food Science & Technology; Nutrition & Dietetics', 'Engineering, Industrial; Ergonomics; Psychology, Applied', 'Ophthalmology', 'Marine & Freshwater Biology; Toxicology', 'Neurosciences; Peripheral Vascular Disease', 'Energy & Fuels; Engineering, Chemical', 'Materials Science, Characterization & Testing; Polymer Science', 'Geology; Mineralogy; Mining & Mineral Processing', 'Behavioral Sciences; Neurosciences', 'Computer Science, Artificial Intelligence; Neurosciences', 'Obstetrics & Gynecology', 'Acoustics; Chemistry, Multidisciplinary', 'Instruments & Instrumentation; Nuclear Science & Technology; Physics, Nuclear; Physics, Particles & Fields', 'Environmental Sciences; Toxicology', 'Engineering, Electrical & Electronic; Instruments & Instrumentation', 'Architecture', 'Soil Science', 'Economics', 'Metallurgy & Metallurgical Engineering', 'Orthopedics', 'Biochemistry & Molecular Biology; Endocrinology & Metabolism; Pharmacology & Pharmacy', 'Biochemical Research Methods; Biochemistry & Molecular Biology; Chemistry, Analytical', 'Hematology; Peripheral Vascular Disease', 'Mechanics; Materials Science, Composites', 'Computer Science, Artificial Intelligence; Engineering, Electrical & Electronic; Operations Research & Management Science', 'Thermodynamics; Energy & Fuels', 'Agriculture, Multidisciplinary; Food Science & Technology', 'Dentistry, Oral Surgery & Medicine', 'Energy & Fuels; Geosciences, Multidisciplinary', 'Meteorology & Atmospheric Sciences', 'Neurosciences; Neuroimaging; Radiology, Nuclear Medicine & Medical Imaging', 'Surgery; Peripheral Vascular Disease', 'Economics; Regional & Urban Planning', 'Agricultural Engineering; Agronomy', 'Geography; Regional & Urban Planning', 'Allergy; Immunology', 'Biochemistry & Molecular Biology; Biophysics; Cell Biology', 'Engineering, Biomedical; Materials Science, Biomaterials', 'Chemistry, Physical; Physics, Atomic, Molecular & Chemical', 'Nanoscience & Nanotechnology; Physics, Condensed Matter', 'Engineering, Chemical; Engineering, Mechanical; Environmental Sciences; Meteorology & Atmospheric Sciences', 'Acoustics; Radiology, Nuclear Medicine & Medical Imaging', 'Health Care Sciences & Services; Medicine, General & Internal; Clinical Neurology', 'Materials Science, Multidisciplinary', 'Construction & Building Technology; Engineering, Civil', 'Chemistry, Physical', 'Immunology; Medicine, Research & Experimental', 'Chemistry, Applied; Chemistry, Physical; Engineering, Chemical', 'Mathematics, Applied', 'Cardiac & Cardiovascular Systems; Peripheral Vascular Disease', 'Environmental Sciences; Marine & Freshwater Biology', 'Physics, Multidisciplinary', 'Biology; Physiology', 'Materials Science, Biomaterials', 'Environmental Sciences; Environmental Studies; Meteorology & Atmospheric Sciences', 'Green & Sustainable Science & Technology; Energy & Fuels', 'Chemistry, Analytical; Electrochemistry; Instruments & Instrumentation', 'Agricultural Engineering; Biotechnology & Applied Microbiology; Energy & Fuels', 'Hospitality, Leisure, Sport & Tourism; Sociology', 'Computer Science, Artificial Intelligence', 'Agronomy; Plant Sciences', 'Demography; Economics; Gerontology', 'Business; Economics; Management', 'Computer Science, Theory & Methods', 'Water Resources', 'Substance Abuse; Psychiatry', 'Business; Regional & Urban Planning', 'Optics', 'Psychology, Educational', 'Economics; Health Care Sciences & Services; Health Policy & Services', 'Psychology, Experimental', 'Computer Science, Interdisciplinary Applications; Information Science & Library Science', 'Mechanics', 'Environmental Sciences; Environmental Studies', 'Chemistry, Analytical', 'Gastroenterology & Hepatology', 'Geography, Physical; Geosciences, Multidisciplinary', 'Clinical Neurology; Pediatrics', 'Immunology; Neurosciences; Psychiatry', 'Parasitology', 'Engineering, Chemical; Polymer Science', 'Infectious Diseases; Microbiology; Respiratory System', 'Construction & Building Technology; Engineering, Civil; Materials Science, Multidisciplinary', 'Rehabilitation; Sport Sciences', 'Engineering, Mechanical', 'Engineering, Marine; Engineering, Civil; Engineering, Ocean; Oceanography', 'Engineering, Multidisciplinary', 'Neuroimaging', 'Pediatrics', 'Automation & Control Systems; Operations Research & Management Science', 'Cardiac & Cardiovascular Systems; Pathology', 'Behavioral Sciences; Neurosciences; Psychology, Experimental', 'Emergency Medicine', 'Nanoscience & Nanotechnology; Materials Science, Multidisciplinary; Metallurgy & Metallurgical Engineering', 'Virology', 'Archaeology', 'Marine & Freshwater Biology', 'Obstetrics & Gynecology; Reproductive Biology', 'Geosciences, Multidisciplinary', 'Management', 'Endocrinology & Metabolism; Neurosciences; Psychiatry', 'Biology; Computer Science, Interdisciplinary Applications; Engineering, Biomedical; Mathematical & Computational Biology', 'Medicine, General & Internal; Medicine, Research & Experimental', 'Information Science & Library Science', 'Ecology; Parasitology', 'Engineering, Civil', 'Engineering, Industrial; Operations Research & Management Science', 'Critical Care Medicine; Emergency Medicine', 'Nanoscience & Nanotechnology; Materials Science, Multidisciplinary', 'Biochemical Research Methods', 'Ergonomics; Public, Environmental & Occupational Health; Social Sciences, Interdisciplinary; Transportation', 'Engineering, Mechanical; Materials Science, Multidisciplinary', 'Computer Science, Information Systems; Telecommunications', 'Hematology; Immunology; Transplantation', 'Genetics & Heredity', 'Materials Science, Multidisciplinary; Physics, Applied', 'Astronomy & Astrophysics; Physics, Nuclear; Physics, Particles & Fields', 'Ecology; Economics; Environmental Sciences; Environmental Studies', 'Environmental Studies; Transportation; Transportation Science & Technology', 'Education & Educational Research', 'Rehabilitation', 'Biotechnology & Applied Microbiology; Chemistry, Analytical; Electrochemistry; Nanoscience & Nanotechnology; Instruments & Instrumentation', 'Engineering, Environmental; Engineering, Chemical', 'Public, Environmental & Occupational Health; Social Sciences, Biomedical', 'Pathology', 'Reproductive Biology; Veterinary Sciences', 'Toxicology', 'Immunology', 'Chemistry, Applied; Food Science & Technology', 'Public, Environmental & Occupational Health; Pharmacology & Pharmacy', 'Entomology', 'Education & Educational Research; Psychology, Educational', 'Computer Science, Artificial Intelligence; Computer Science, Interdisciplinary Applications', 'Obstetrics & Gynecology; Peripheral Vascular Disease', 'Biochemistry & Molecular Biology; Biophysics', 'Construction & Building Technology; Engineering, Environmental; Engineering, Civil', 'Neurosciences; Physiology; Rehabilitation; Sport Sciences', 'Clinical Neurology; Psychiatry', 'Psychology, Social', 'Geography', 'Biotechnology & Applied Microbiology; Genetics & Heredity; Toxicology', 'Green & Sustainable Science & Technology; Environmental Sciences', 'Psychology, Biological; Behavioral Sciences; Psychology; Psychology, Experimental', 'Automation & Control Systems; Chemistry, Analytical; Computer Science, Artificial Intelligence; Instruments & Instrumentation; Mathematics, Interdisciplinary Applications; Statistics & Probability', 'Engineering, Aerospace', 'Food Science & Technology; Microbiology', 'Clinical Neurology; Neurosciences', 'Computer Science, Artificial Intelligence; Engineering, Electrical & Electronic', 'Agriculture, Dairy & Animal Science; Veterinary Sciences', 'Veterinary Sciences', 'Otorhinolaryngology', 'Ecology; Environmental Sciences; Environmental Studies', 'Engineering, Manufacturing', 'Social Sciences, Interdisciplinary', 'Integrative & Complementary Medicine', 'Computer Science, Interdisciplinary Applications; Computer Science, Theory & Methods', 'Economics; Transportation', 'Oceanography', 'Marine & Freshwater Biology; Oceanography', 'Biochemistry & Molecular Biology; Chemistry, Organic', 'Neurosciences; Pharmacology & Pharmacy', 'Computer Science, Artificial Intelligence; Engineering, Multidisciplinary', 'Chemistry, Physical; Materials Science, Multidisciplinary', 'Materials Science, Multidisciplinary; Nuclear Science & Technology', 'Engineering, Electrical & Electronic; Materials Science, Multidisciplinary; Physics, Applied; Physics, Condensed Matter', 'Peripheral Vascular Disease', 'Medicine, Legal; Pharmacology & Pharmacy; Toxicology', 'Agronomy; Forestry; Meteorology & Atmospheric Sciences', 'Computer Science, Interdisciplinary Applications; Engineering, Industrial; Operations Research & Management Science', 'Economics; Energy & Fuels; Environmental Sciences; Environmental Studies', 'Oncology', 'Biochemistry & Molecular Biology; Entomology', 'Chemistry, Physical; Nanoscience & Nanotechnology; Materials Science, Multidisciplinary', 'Engineering, Mechanical; Mechanics', 'Microbiology', 'Geosciences, Multidisciplinary; Meteorology & Atmospheric Sciences; Water Resources', 'Ecology; Mycology', 'Computer Science, Information Systems', 'Agronomy; Food Science & Technology; Horticulture', 'Oncology; Public, Environmental & Occupational Health', 'Biochemical Research Methods; Biotechnology & Applied Microbiology; Virology', 'Acoustics', 'Horticulture', 'Green & Sustainable Science & Technology; Geography, Physical', 'Biotechnology & Applied Microbiology', 'Chemistry, Applied; Chemistry, Physical; Nanoscience & Nanotechnology; Materials Science, Multidisciplinary', 'Business; Communication', 'Engineering, Chemical; Food Science & Technology', 'Neurosciences; Pharmacology & Pharmacy; Toxicology', 'Respiratory System', 'Oncology; Cardiac & Cardiovascular Systems', 'Parasitology; Pharmacology & Pharmacy', 'Microscopy', 'History & Philosophy Of Science', 'Anthropology; Archaeology; Geosciences, Multidisciplinary', 'Environmental Sciences; Remote Sensing; Imaging Science & Photographic Technology', 'Orthopedics; Sport Sciences; Surgery', 'Agriculture, Dairy & Animal Science; Behavioral Sciences; Veterinary Sciences', 'Family Studies; Psychology, Social; Social Work', 'Chemistry, Analytical; Electrochemistry', 'Business; Management', 'Hospitality, Leisure, Sport & Tourism; Psychology, Applied; Psychology; Sport Sciences', 'Engineering, Aerospace; Astronomy & Astrophysics; Geosciences, Multidisciplinary; Meteorology & Atmospheric Sciences', 'Mathematics', 'Oncology; Respiratory System', 'Computer Science, Interdisciplinary Applications; Computer Science, Theory & Methods; Engineering, Biomedical; Medical Informatics', 'Engineering, Manufacturing; Materials Science, Multidisciplinary', 'Anatomy & Morphology', 'Biochemistry & Molecular Biology; Chemistry, Applied; Polymer Science', 'Computer Science, Interdisciplinary Applications; Physics, Mathematical', 'Computer Science, Theory & Methods; Mathematics, Applied', 'Astronomy & Astrophysics', 'Instruments & Instrumentation; Optics; Physics, Applied', 'Primary Health Care; Medicine, General & Internal', 'Geriatrics & Gerontology; Neurosciences', 'Materials Science, Multidisciplinary; Physics, Condensed Matter', 'Psychology, Clinical; Health Care Sciences & Services; Medical Informatics; Psychiatry', 'Geosciences, Multidisciplinary; Oceanography', 'Agricultural Engineering; Agriculture, Multidisciplinary', 'Engineering, Multidisciplinary; Instruments & Instrumentation', 'Cell Biology', 'Biochemistry & Molecular Biology; Cell Biology', 'Computer Science, Interdisciplinary Applications; Medical Informatics', 'Fisheries; Marine & Freshwater Biology', 'Agronomy', 'Immunology; Pharmacology & Pharmacy', 'Construction & Building Technology; Materials Science, Composites', 'Chemistry, Medicinal; Pharmacology & Pharmacy', 'Chemistry, Applied; Materials Science, Coatings & Films', 'Geography; Political Science', 'Computer Science, Interdisciplinary Applications; Engineering, Environmental; Environmental Studies; Geography; Operations Research & Management Science; Regional & Urban Planning', 'Oncology; Obstetrics & Gynecology', 'Substance Abuse', 'Materials Science, Multidisciplinary; Metallurgy & Metallurgical Engineering; Materials Science, Characterization & Testing', 'Biotechnology & Applied Microbiology; Food Science & Technology; Microbiology', 'Anesthesiology', 'Chemistry, Physical; Engineering, Environmental; Engineering, Chemical', 'Chemistry, Physical; Nuclear Science & Technology; Physics, Atomic, Molecular & Chemical', 'Agriculture, Dairy & Animal Science; Food Science & Technology', 'Mathematics, Applied; Mathematics', 'Medicine, Research & Experimental', 'Mathematics, Interdisciplinary Applications; Physics, Multidisciplinary; Physics, Mathematical', 'Environmental Studies; Psychology, Multidisciplinary', 'Electrochemistry', 'Ecology', 'Environmental Sciences; Environmental Studies; Geography', 'Polymer Science', 'Immunology; Microbiology; Respiratory System', 'Biochemistry & Molecular Biology; Pharmacology & Pharmacy; Toxicology', 'Chemistry, Medicinal', 'Construction & Building Technology; Green & Sustainable Science & Technology; Energy & Fuels', 'Economics; Law', 'Education & Educational Research; Linguistics; Language & Linguistics', 'Economics; Public, Environmental & Occupational Health', 'Biochemical Research Methods; Chemistry, Analytical', 'Materials Science, Multidisciplinary; Mechanics; Physics, Condensed Matter', 'Automation & Control Systems; Engineering, Electrical & Electronic; Engineering, Mechanical; Robotics', 'Thermodynamics; Energy & Fuels; Engineering, Mechanical; Mechanics', 'Materials Science, Composites', 'Social Issues; Social Sciences, Interdisciplinary', 'Biotechnology & Applied Microbiology; Microbiology', 'Computer Science, Interdisciplinary Applications; Engineering, Environmental; Environmental Sciences; Water Resources', 'Biochemical Research Methods; Microbiology', 'Medicine, General & Internal; Urology & Nephrology', 'Behavioral Sciences; Clinical Neurology; Psychiatry', 'Chemistry, Physical; Physics, Condensed Matter', 'Chemistry, Medicinal; Chemistry, Multidisciplinary; Pharmacology & Pharmacy', 'Chemistry, Applied; Chemistry, Organic; Polymer Science', 'Biology', 'Environmental Sciences; Limnology; Marine & Freshwater Biology', 'Endocrinology & Metabolism; Primary Health Care', 'Chemistry, Physical; Electrochemistry; Energy & Fuels', 'Engineering, Electrical & Electronic; Instruments & Instrumentation; Materials Science, Multidisciplinary', 'Engineering, Industrial; Engineering, Manufacturing; Materials Science, Multidisciplinary', 'Medical Laboratory Technology', 'Cell Biology; Geriatrics & Gerontology', 'Biotechnology & Applied Microbiology; Genetics & Heredity', 'Biochemical Research Methods; Biochemistry & Molecular Biology', 'Chemistry, Physical; Electrochemistry; Energy & Fuels; Materials Science, Multidisciplinary', 'Acoustics; Engineering, Biomedical; Instruments & Instrumentation; Radiology, Nuclear Medicine & Medical Imaging', 'Computer Science, Artificial Intelligence; Computer Science, Information Systems; Computer Science, Software Engineering', 'Biochemistry & Molecular Biology; Parasitology', 'Thermodynamics; Chemistry, Physical; Engineering, Chemical', 'Orthopedics; Rheumatology', 'Biochemistry & Molecular Biology; Endocrinology & Metabolism', 'Geosciences, Multidisciplinary; Soil Science; Water Resources', 'Thermodynamics; Engineering, Mechanical', 'Business; Management; Transportation', 'Education & Educational Research; Linguistics', 'Psychology, Multidisciplinary; Psychology, Experimental', 'Computer Science, Interdisciplinary Applications', 'Computer Science, Theory & Methods; Logic', 'Engineering, Geological; Mining & Mineral Processing', 'Mathematics, Applied; Mathematics; Physics, Mathematical', 'Developmental Biology', 'Psychology, Social; Social Sciences, Interdisciplinary; Sociology', 'Mathematical & Computational Biology; Infectious Diseases', 'Biotechnology & Applied Microbiology; Engineering, Environmental; Environmental Sciences', 'Food Science & Technology; Toxicology', 'Acoustics; Computer Science, Interdisciplinary Applications', 'Engineering, Civil; Geosciences, Multidisciplinary; Water Resources', 'Sociology', 'Materials Science, Ceramics', 'Biophysics; Biotechnology & Applied Microbiology; Chemistry, Analytical; Electrochemistry; Nanoscience & Nanotechnology', 'Biochemistry & Molecular Biology; Evolutionary Biology; Genetics & Heredity', 'Thermodynamics; Energy & Fuels; Mechanics', 'Engineering, Environmental; Environmental Sciences; Water Resources', 'Rheumatology', 'Cardiac & Cardiovascular Systems; Pediatrics', 'Public, Environmental & Occupational Health; Medicine, General & Internal', 'Psychology, Developmental', 'Energy & Fuels; Engineering, Petroleum', 'Clinical Neurology; Neuroimaging; Psychiatry', 'Environmental Sciences; Pharmacology & Pharmacy; Toxicology', 'Chemistry, Applied; Energy & Fuels; Engineering, Chemical', 'Oncology; Cell Biology', 'Biochemical Research Methods; Biotechnology & Applied Microbiology; Chemistry, Analytical', 'Infectious Diseases; Microbiology; Pharmacology & Pharmacy', 'Engineering, Biomedical', 'Biochemistry & Molecular Biology; Plant Sciences', 'Obstetrics & Gynecology; Pediatrics', 'Automation & Control Systems; Engineering, Multidisciplinary; Instruments & Instrumentation', 'Economics; Management', 'Computer Science, Interdisciplinary Applications; Engineering, Geological; Geosciences, Multidisciplinary', 'Mycology', 'Materials Science, Multidisciplinary; Materials Science, Coatings & Films; Physics, Applied; Physics, Condensed Matter', 'Operations Research & Management Science', 'Operations Research & Management Science; Transportation Science & Technology', 'Fisheries; Immunology; Marine & Freshwater Biology; Veterinary Sciences', 'Immunology; Neurosciences', 'Plant Sciences', 'Political Science', 'Agronomy; Water Resources', 'Hospitality, Leisure, Sport & Tourism', 'Oncology; Hematology', 'Biochemistry & Molecular Biology; Cell Biology; Immunology', 'Information Science & Library Science; Management', 'Engineering, Industrial; Ergonomics', 'Engineering, Environmental; Engineering, Civil; Geosciences, Multidisciplinary', 'Agricultural Economics & Policy; Economics; Food Science & Technology; Nutrition & Dietetics', 'Biochemistry & Molecular Biology; Nutrition & Dietetics', 'Immunology; Microbiology', 'Literature; Sociology', 'Parasitology; Veterinary Sciences', 'Agriculture, Multidisciplinary; Ecology; Environmental Sciences', 'Business; Economics; Environmental Studies', 'Engineering, Civil; Engineering, Ocean', 'Cell Biology; Endocrinology & Metabolism', 'Biochemistry & Molecular Biology; Neurosciences', 'Agriculture, Dairy & Animal Science; Endocrinology & Metabolism', 'Computer Science, Artificial Intelligence; Engineering, Biomedical; Medical Informatics', 'Biophysics; Engineering, Biomedical', 'Chemistry, Applied; Engineering, Chemical; Materials Science, Textiles', 'Computer Science, Information Systems; Engineering, Electrical & Electronic; Telecommunications', 'Audiology & Speech-Language Pathology; Otorhinolaryngology', 'Environmental Sciences; Nanoscience & Nanotechnology', 'Cardiac & Cardiovascular Systems; Cell Biology', 'Chemistry, Multidisciplinary; Materials Science, Multidisciplinary; Polymer Science', 'Engineering, Chemical; Materials Science, Multidisciplinary', 'Optics; Spectroscopy', 'Health Policy & Services', 'Engineering, Electrical & Electronic', 'Microbiology; Veterinary Sciences', 'Environmental Sciences; Food Science & Technology; Microbiology', 'Geography, Physical; Geosciences, Multidisciplinary; Paleontology', 'Biochemical Research Methods; Biotechnology & Applied Microbiology', 'Engineering, Chemical; Mineralogy; Mining & Mineral Processing', 'Neurosciences; Ophthalmology; Psychology', 'Agricultural Engineering; Fisheries', 'Astronomy & Astrophysics; Physics, Particles & Fields', 'Geriatrics & Gerontology', 'Economics; Geography; Transportation', 'Critical Care Medicine', 'Orthopedics; Sport Sciences', 'Biodiversity Conservation; Ecology; Environmental Sciences', 'Engineering, Multidisciplinary; Materials Science, Composites', 'Oceanography; Water Resources', 'Biodiversity Conservation; Ecology', 'Criminology & Penology; Psychology, Multidisciplinary', 'Psychology, Clinical; Psychiatry', 'Biotechnology & Applied Microbiology; Engineering, Chemical; Food Science & Technology', 'Engineering, Multidisciplinary; Engineering, Manufacturing', 'Engineering, Manufacturing; Materials Science, Composites', 'Computer Science, Information Systems; Information Science & Library Science', 'Energy & Fuels; Environmental Sciences; Environmental Studies', 'Plant Sciences; Environmental Studies; Forestry; Urban Studies', 'Dermatology', 'Education, Scientific Disciplines; Nutrition & Dietetics', 'Thermodynamics; Chemistry, Analytical; Chemistry, Physical', 'Materials Science, Multidisciplinary; Multidisciplinary Sciences', 'Biochemical Research Methods; Physics, Atomic, Molecular & Chemical; Spectroscopy', 'Engineering, Industrial; Engineering, Manufacturing; Operations Research & Management Science', 'Plant Sciences; Environmental Sciences', 'Computer Science, Cybernetics; Ergonomics; Psychology, Multidisciplinary', 'Psychology, Applied; Transportation', 'Economics; Engineering, Civil; Operations Research & Management Science; Transportation; Transportation Science & Technology', 'Engineering, Geological; Geosciences, Multidisciplinary', 'Management; Operations Research & Management Science', 'Education, Special; Rehabilitation', 'Computer Science, Hardware & Architecture; Computer Science, Theory & Methods', 'Biochemistry & Molecular Biology; Biology; Biophysics; Electrochemistry', 'Medicine, Research & Experimental; Clinical Neurology', 'Automation & Control Systems; Engineering, Electrical & Electronic', 'Cardiac & Cardiovascular Systems; Endocrinology & Metabolism; Nutrition & Dietetics', 'Biochemistry & Molecular Biology; Genetics & Heredity', 'Public, Environmental & Occupational Health; Transportation', 'Linguistics; Language & Linguistics', 'Plant Sciences; Ecology', 'Cardiac & Cardiovascular Systems; Radiology, Nuclear Medicine & Medical Imaging', 'Green & Sustainable Science & Technology; Energy & Fuels; Engineering, Environmental; Engineering, Chemical', 'Reproductive Biology; Toxicology', 'Sport Sciences', 'Acoustics; Engineering, Mechanical; Mechanics', 'Materials Science, Ceramics; Materials Science, Multidisciplinary', 'Engineering, Chemical; Water Resources', 'Chemistry, Inorganic & Nuclear', 'Computer Science, Hardware & Architecture; Computer Science, Information Systems; Engineering, Electrical & Electronic; Telecommunications', 'Critical Care Medicine; Emergency Medicine; Orthopedics; Surgery', 'Gerontology', 'Computer Science, Artificial Intelligence; Computer Science, Interdisciplinary Applications; Engineering, Biomedical; Radiology, Nuclear Medicine & Medical Imaging', 'Environmental Sciences; Environmental Studies; Geosciences, Multidisciplinary; Public, Environmental & Occupational Health; Meteorology & Atmospheric Sciences', 'Economics; Political Science', 'Fisheries; Immunology; Veterinary Sciences; Zoology', 'Audiology & Speech-Language Pathology; Neurosciences; Otorhinolaryngology', 'Chemistry, Analytical; Energy & Fuels; Engineering, Chemical', 'Development Studies; Economics', 'Geosciences, Multidisciplinary; Mining & Mineral Processing', 'Chemistry, Physical; Energy & Fuels; Materials Science, Multidisciplinary', 'Biochemistry & Molecular Biology; Endocrinology & Metabolism; Toxicology; Zoology', 'Psychology, Biological; Neurosciences; Physiology; Psychology; Psychology, Experimental', 'Biochemical Research Methods; Immunology', 'Oncology; Dentistry, Oral Surgery & Medicine', 'Engineering, Marine', 'Chemistry, Physical; Nanoscience & Nanotechnology; Materials Science, Multidisciplinary; Physics, Applied', 'Health Care Sciences & Services; Health Policy & Services', 'Oncology; Geriatrics & Gerontology', 'Medicine, Legal', 'Ecology; Environmental Studies; Geography; Geography, Physical; Regional & Urban Planning; Urban Studies', 'Biochemistry & Molecular Biology; Chemistry, Medicinal; Chemistry, Organic', 'Chemistry, Physical; Materials Science, Multidisciplinary; Mineralogy', 'Physiology; Respiratory System', 'Critical Care Medicine; Respiratory System', 'Psychology, Biological; Behavioral Sciences; Social Sciences, Biomedical', 'Genetics & Heredity; Medicine, Legal', 'Behavioral Sciences; Neurosciences; Pharmacology & Pharmacy', 'Astronomy & Astrophysics; Biology; Multidisciplinary Sciences', 'Oncology; Nursing', 'Law', 'Ecology; Marine & Freshwater Biology', 'Cell Biology; Immunology', 'Anesthesiology; Cardiac & Cardiovascular Systems; Respiratory System; Peripheral Vascular Disease', 'Orthopedics; Surgery', 'Construction & Building Technology; Materials Science, Multidisciplinary', 'Psychology, Applied', 'Thermodynamics; Energy & Fuels; Engineering, Chemical; Engineering, Mechanical', 'Materials Science, Multidisciplinary; Mechanics', 'History & Philosophy Of Science; Mathematics, Interdisciplinary Applications; Mathematics', 'Biodiversity Conservation; Biology', 'Biotechnology & Applied Microbiology; Food Science & Technology', 'Computer Science, Artificial Intelligence; Energy & Fuels', 'Education, Scientific Disciplines; Nursing', 'Business, Finance; Economics; Management; Operations Research & Management Science', 'Cardiac & Cardiovascular Systems; Respiratory System', 'Immunology; Infectious Diseases; Virology', 'Environmental Sciences; Soil Science; Water Resources', 'Genetics & Heredity; Toxicology', 'Remote Sensing', 'Energy & Fuels; Engineering, Chemical; Engineering, Petroleum', 'Transportation; Transportation Science & Technology', 'Neurosciences; Psychology; Psychology, Experimental; Sport Sciences', 'Computer Science, Information Systems; Computer Science, Interdisciplinary Applications', 'Biochemistry & Molecular Biology; Mathematical & Computational Biology', 'Infectious Diseases; Virology', 'Engineering, Multidisciplinary; Mechanics', 'Computer Science, Interdisciplinary Applications; Geosciences, Multidisciplinary', 'Health Care Sciences & Services; Health Policy & Services; Public, Environmental & Occupational Health', 'Engineering, Mechanical; Materials Science, Multidisciplinary; Mechanics', 'Energy & Fuels; Engineering, Petroleum; Engineering, Mechanical', 'Economics; Environmental Studies; Forestry', 'Endocrinology & Metabolism; Orthopedics', 'Endocrinology & Metabolism; Nutrition & Dietetics', 'Developmental Biology; Obstetrics & Gynecology; Reproductive Biology', 'Chemistry, Inorganic & Nuclear; Crystallography', 'Dentistry, Oral Surgery & Medicine; Surgery', 'Psychology, Developmental; Psychology, Experimental', 'Thermodynamics; Energy & Fuels; Engineering, Multidisciplinary; Engineering, Chemical; Engineering, Mechanical', 'Instruments & Instrumentation; Nuclear Science & Technology; Physics, Atomic, Molecular & Chemical; Physics, Nuclear','Other']

selected_category = st.selectbox("Select WoS categories:", options, index= None)
if selected_category == "Other":
    custom_category = st.text_input("Enter custom category:")
    selected_category = custom_category if custom_category else "Other"

combined_text = f"{title_input} [SEP] {keywords_input} [SEP] {abstract_input} [SEP] {selected_category} [SEP] {affiliations_input}"
if st.button("Predict"):
    if not any([title_input, abstract_input,keywords_input,  affiliations_input]):
        st.warning("Please enter paper text.")
    else:
        with st.spinner("Predicting..."):
            predicted_class = predict_class(combined_text)
            if predicted_class is not None:
                class_labels = ["Level 1", "Level 2", "Level 3", "Level 4"]

                st.text("Predicted Class:")
                for i, label in enumerate(class_labels):
                    if i == predicted_class:
                        st.markdown(
                            f'<div style="background-color: {class_colors[predicted_class]}; padding: 10px; border-radius: 5px; color: white; font-weight: bold;">{label}</div>',
                            unsafe_allow_html=True
                        )
                    else:
                        st.text(label)

#elif option == "PDF":
#    uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

#    if uploaded_file is not None:
#        with st.spinner("Processing PDF..."):
#            file_path = os.path.join(uploaded_files_dir, uploaded_file.name)
#            with open(file_path, "wb") as f:
#                f.write(uploaded_file.getbuffer())
#            st.success("File uploaded successfully.")
#            st.text(f"File Path: {file_path}")
#            
#            file_text = extract_text_from_pdf(file_path)
#            st.text("Extracted Text:")
#            st.text(file_text)
#
#            if st.button("Predict from PDF Text"):
#                if not file_text.strip():
#                    st.warning("Please upload a PDF with text content.")
#                else:
#                    with st.spinner("Predicting..."):
#                        predicted_class = predict_class(file_text)
#                        if predicted_class is not None:
#                            class_labels = ["Level 1", "Level 2", "Level 3", "Level 4"]
#                            st.text("**Predicted Class:**")
#                            for i, label in enumerate(class_labels):
#                                if i == predicted_class:
#                                    st.markdown(
#                                        f'<div style="background-color: {class_colors[predicted_class]}; padding: 10px; border-radius: 5px; color: white; font-weight: bold;">{label}</div>',
#                                        unsafe_allow_html=True
#                                    )
#                                else:
#                                    st.text(label)