[tool.poetry]
name = "llmdataparser"
version = "0.1.0"
description = "A collection of parsers for LLM benchmark datasets like MMLU, MMLU-Pro, GSM8k, and more."
authors = ["Jeff Yang <jeff52415@gmail.com>"]
license = "MIT"
readme = "README.md"
homepage = "https://github.com/jeff52415/LLMDataParser"
repository = "https://github.com/jeff52415/LLMDataParser"
keywords = ["LLM", "benchmark", "dataset", "parser", "NLP", "machine learning"]
classifiers = [
    "Programming Language :: Python :: 3",
    "License :: OSI Approved :: MIT License",
    "Operating System :: OS Independent",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
    "Intended Audience :: Developers"
]

[tool.poetry.dependencies]
python = ">=3.11"
pandas = "^2.0.3"
datasets = "^2.14.4"
typing-extensions = "^4.8.0"

[tool.poetry.group.dev.dependencies]
pytest = "^7.4.0"
black = { version = "^23.9.1", allow-prereleases = true }
flake8 = "^6.1.0"
isort = "^5.12.0"
mypy = "^1.5.1"
pre-commit = "^3.4.0"
types-python-dateutil = "^2.8.19.14"
ipykernel = "^6.7.0"

[tool.black]
line-length = 88
target-version = ["py311"]
exclude = """
/(
    \\.git
  | \\.venv
  | build
  | dist
)/
"""

[tool.isort]
profile = "black"
line_length = 88
known_first_party = ["llmdataparser"]

[tool.flake8]
max-line-length = 88
ignore = [
    "E501"  # Line too long
]

[tool.ruff]
line-length = 88
select = ["E", "F"]  # or specify checks explicitly without E501
ignore = ["E501"]

[build-system]
requires = ["poetry-core>=1.5.0"]
build-backend = "poetry.core.masonry.api"