Canstralian commited on
Commit
653b423
·
verified ·
1 Parent(s): db475c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -75
app.py CHANGED
@@ -1,77 +1,88 @@
1
- import gradio as gr
2
- from transformers import AutoTokenizer, AutoModel
3
- import random
4
-
5
- class TokenizerModelInitializer:
6
- def __init__(self, model_name: str):
7
- """
8
- Initializes the TokenizerModelInitializer with a specified model name.
9
- """
10
- if not model_name:
11
- raise ValueError("Model name must not be empty.")
12
-
13
- self.model_name = model_name
14
- try:
15
- # Load tokenizer and model
16
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
17
- self.model = AutoModel.from_pretrained(self.model_name)
18
- except Exception as e:
19
- raise Exception(f"Failed to load the model or tokenizer: {str(e)}")
20
-
21
- def tokenize_text(self, text: str):
22
- """
23
- Tokenizes the input text using the initialized tokenizer.
24
- """
25
- if not text:
26
- raise ValueError("Input text must not be empty.")
27
-
28
- tokens = self.tokenizer.tokenize(text)
29
- return tokens
30
-
31
- # Instantiate the model and tokenizer
32
- model_name = "bert-base-uncased"
33
- initializer = TokenizerModelInitializer(model_name)
34
-
35
- # Gradio Function to Display Tokenized Text
36
- def gradio_tokenize(input_text):
 
 
 
37
  try:
38
- tokens = initializer.tokenize_text(input_text)
39
- return f"Tokens: {tokens}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  except Exception as e:
41
- return f"Error: {str(e)}"
42
-
43
- # Gradio Chatbot Function (simulated)
44
- def chatbot_response(user_input):
45
- responses = [
46
- "That's an interesting question! Let me think about it.",
47
- "I'm still learning, but I will improve soon!",
48
- "Thanks for your input! I'm always evolving.",
49
- "Great question! I'll keep working on my answers.",
50
- ]
51
- return random.choice(responses)
52
-
53
- # Create Gradio Interface for Tokenization
54
- tokenization_interface = gr.Interface(
55
- fn=gradio_tokenize,
56
- inputs="text",
57
- outputs="text",
58
- title="Tokenization Interface",
59
- description="Enter a text and I'll tokenize it using the specified model's tokenizer.",
60
- theme="compact"
61
- )
62
-
63
- # Create Gradio Interface for Chatbot
64
- chatbot_interface = gr.Interface(
65
- fn=chatbot_response,
66
- inputs=gr.Textbox(label="Ask me anything!"),
67
- outputs="text",
68
- title="Competing Chatbot",
69
- description="Challenge me with your questions while the model is fine-tuning!",
70
- theme="compact"
71
- )
72
-
73
- # Combine both interfaces in a tabbed interface
74
- demo = gr.TabbedInterface([tokenization_interface, chatbot_interface], tab_names=["Tokenization", "Chatbot"])
75
-
76
- # Launch the interface
77
- demo.launch()
 
1
+ import pandas as pd
2
+ import os
3
+
4
+ def load_dataset(file_path: str) -> pd.DataFrame:
5
+ """
6
+ Loads a dataset from a specified file path into a Pandas DataFrame.
7
+
8
+ This function reads a dataset from a given file path. The file can be in various formats
9
+ supported by Pandas, such as CSV, Excel, or JSON. The function returns the dataset as a
10
+ Pandas DataFrame, which is a powerful data structure for data manipulation and analysis.
11
+
12
+ Parameters:
13
+ - file_path (str): The path to the dataset file. This should be a string representing
14
+ the location of the file on the filesystem.
15
+
16
+ Returns:
17
+ pd.DataFrame: A DataFrame containing the loaded dataset.
18
+
19
+ Raises:
20
+ - FileNotFoundError: If the specified file path does not exist or cannot be found.
21
+ - ValueError: If the file format is not supported or if the file is empty.
22
+ - pd.errors.EmptyDataError: If the file is empty and cannot be read into a DataFrame.
23
+ - pd.errors.ParserError: If there is an error while parsing the file.
24
+ - TypeError: If the file path is not a string or is an unsupported file format.
25
+
26
+ Examples:
27
+ >>> df = load_dataset('data/my_dataset.csv')
28
+ >>> print(df.head())
29
+ """
30
+
31
+ # Checking if file path is a string
32
+ if not isinstance(file_path, str):
33
+ raise TypeError(f"Expected file path to be a string, but got {type(file_path).__name__}.")
34
+
35
+ # Checking if the file exists
36
+ if not os.path.exists(file_path):
37
+ raise FileNotFoundError(f"File not found: {file_path}. Please check the path and try again.")
38
+
39
+ # Attempting to load the dataset based on the file extension
40
  try:
41
+ # Determine the file extension and load the file accordingly
42
+ file_extension = file_path.split('.')[-1].lower()
43
+
44
+ if file_extension == 'csv':
45
+ dataset = pd.read_csv(file_path)
46
+ elif file_extension in ['xlsx', 'xls']:
47
+ dataset = pd.read_excel(file_path)
48
+ elif file_extension == 'json':
49
+ dataset = pd.read_json(file_path)
50
+ else:
51
+ raise ValueError(f"Unsupported file format: {file_extension}. Supported formats are CSV, Excel, and JSON.")
52
+
53
+ # Checking if the dataset is empty
54
+ if dataset.empty:
55
+ raise pd.errors.EmptyDataError(f"The file at {file_path} is empty and cannot be loaded into a DataFrame.")
56
+
57
+ return dataset
58
+
59
+ except ValueError as value_error:
60
+ raise ValueError(f"Error loading the dataset from {file_path}. Please ensure the file is in a supported format and not empty.") from value_error
61
+
62
+ except pd.errors.EmptyDataError as empty_data_error:
63
+ raise pd.errors.EmptyDataError(f"The file at {file_path} is empty and cannot be loaded into a DataFrame.") from empty_data_error
64
+
65
+ except pd.errors.ParserError as parser_error:
66
+ raise pd.errors.ParserError(f"Error parsing the file at {file_path}. Please check the file format and contents.") from parser_error
67
+
68
  except Exception as e:
69
+ raise Exception(f"An error occurred while loading the file: {file_path}. Error details: {str(e)}") from e
70
+
71
+
72
+ # Example usage of the load_dataset function:
73
+ try:
74
+ # Example 1: Loading a dataset from a CSV file
75
+ dataset = load_dataset('data/my_dataset.csv')
76
+ print("Dataset loaded successfully!")
77
+ print(dataset.head()) # Displaying the first few rows of the dataset
78
+
79
+ # Example 2: Loading a dataset from an Excel file
80
+ dataset = load_dataset('data/my_dataset.xlsx')
81
+ print("Dataset loaded successfully!")
82
+ print(dataset.head()) # Displaying the first few rows of the dataset
83
+
84
+ # Example 3: Attempting to load a non-existent file (should raise an error)
85
+ dataset = load_dataset('data/non_existent_file.csv')
86
+
87
+ except Exception as e:
88
+ print(f"An error occurred: {e}")