Update README.md
Browse files
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
|
2 |
|
3 |
-
## Usage
|
4 |
|
5 |
```python
|
6 |
import torch
|
@@ -39,7 +39,7 @@ ocr_df = ocr_df.dropna() \\n .assign(left_scaled = ocr_df.left*w_s
|
|
39 |
|
40 |
float_cols = ocr_df.select_dtypes('float').columns
|
41 |
ocr_df[float_cols] = ocr_df[float_cols].round(0).astype(int)
|
42 |
-
ocr_df = ocr_df.replace(r'^\s
|
43 |
ocr_df = ocr_df.dropna().reset_index(drop=True)
|
44 |
ocr_df[:20]
|
45 |
|
|
|
1 |
+
# LayoutLM fine-tuned on FUNSD for Document token classification
|
2 |
|
3 |
+
## Usage (WIP)
|
4 |
|
5 |
```python
|
6 |
import torch
|
|
|
39 |
|
40 |
float_cols = ocr_df.select_dtypes('float').columns
|
41 |
ocr_df[float_cols] = ocr_df[float_cols].round(0).astype(int)
|
42 |
+
ocr_df = ocr_df.replace(r'^\s*{{%htmlContent%}}#39;, np.nan, regex=True)
|
43 |
ocr_df = ocr_df.dropna().reset_index(drop=True)
|
44 |
ocr_df[:20]
|
45 |
|