Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
•
4a5d03c
1
Parent(s):
9ed5b2c
remove nsfw datasets
Browse files- load_data.py +1 -0
load_data.py
CHANGED
@@ -141,6 +141,7 @@ def load_cards(
|
|
141 |
f"Loading cards with min_len={min_len}, min_likes={min_likes}, last_modified={last_modified}"
|
142 |
)
|
143 |
df = pl.read_parquet(DATASET_PARQUET_URL)
|
|
|
144 |
df = parse_markdown_column(df, "card", "datasetId")
|
145 |
df = df.with_columns(pl.col("parsed_markdown").str.len_chars().alias("card_len"))
|
146 |
df = df.filter(pl.col("card_len") > min_len)
|
|
|
141 |
f"Loading cards with min_len={min_len}, min_likes={min_likes}, last_modified={last_modified}"
|
142 |
)
|
143 |
df = pl.read_parquet(DATASET_PARQUET_URL)
|
144 |
+
df = df.filter(~pl.col("tags").list.contains("not-for-all-audiences"))
|
145 |
df = parse_markdown_column(df, "card", "datasetId")
|
146 |
df = df.with_columns(pl.col("parsed_markdown").str.len_chars().alias("card_len"))
|
147 |
df = df.filter(pl.col("card_len") > min_len)
|