import pytest | |
import pandas as pd | |
import unittest | |
from result_data_processor import ResultDataProcessor | |
import os | |
class TestRegression(unittest.TestCase): | |
def test_data_output_is_the_same(self): | |
df_current = ResultDataProcessor().data | |
# load the reference dataframe | |
last_commit = os.popen('git rev-parse HEAD').read().strip() | |
print(last_commit) | |
reference_file = f'dataframe_history/output_{last_commit}.parquet' | |
df_reference = pd.read_parquet(reference_file) | |
#TODO | |
# if there are no untracked changes, the dataframes should be the same | |
# if there is no file saved for the current commit, save a file for the current commit | |
# instead check the last commit to the one previous to that one | |
# if there are untracked changes, the dataframes should be different | |
# either optionally take a parameter for this test or extract the comparison logic so that it can be used separately to | |
# compare given any two commit hashes | |
# Compare DataFrames, allowing for some tolerance in floating-point comparisons | |
pd.testing.assert_frame_equal(df_current, df_reference, check_dtype=True, atol=1e-5) | |
if __name__ == '__main__': | |
unittest.main() |