Spaces:
Running
Running
echo """ | |
CREATE TABLE presse AS ( | |
SELECT file_id, ocr, title, date, author, page_count, word_count, character_count | |
FROM read_parquet([""" > $TMPDIR/presse.sql | |
for i in $(seq 1 320); do | |
echo " 'https://huggingface.co/datasets/PleIAs/French-PD-Newspapers/resolve/main/gallica_presse_$i.parquet'," >> $TMPDIR/presse.sql | |
done | |
echo """ ]) | |
); | |
ALTER TABLE presse ALTER COLUMN date TYPE DATE; | |
COPY presse TO '$TMPDIR/presse.parquet' (FORMAT 'parquet', COMPRESSION 'GZIP'); | |
""" >> $TMPDIR/presse.sql | |
duckdb < $TMPDIR/presse.sql | |
# isatty | |
if [ -t 1 ]; then | |
echo parquet file output at: $TMPDIR/presse.parquet | |
echo "duckdb -csv :memory: \"SELECT * FROM '$TMPDIR/presse.parquet'\"" | |
else | |
cat $TMPDIR/presse.parquet | |
rm $TMPDIR/presse.parquet | |
fi | |