docs2datasets / demo.py
davidberenstein1957's picture
feat: add docling support
08fabf7
raw
history blame contribute delete
285 Bytes
from docling.document_converter import DocumentConverter
source = "https://arxiv.org/pdf/2408.09869" # PDF path or URL
converter = DocumentConverter()
result = converter.convert(source)
print(
result.document.export_to_markdown()
) # output: "### Docling Technical Report[...]"