Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -52,6 +52,21 @@ def check_poppler_installed():
|
|
52 |
|
53 |
check_poppler_installed()
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
def clean_extracted_text(text):
|
56 |
"""
|
57 |
Cleans extracted text to remove metadata, headers, and irrelevant content.
|
|
|
52 |
|
53 |
check_poppler_installed()
|
54 |
|
55 |
+
|
56 |
+
def extract_patent_number(url):
|
57 |
+
pattern = r"/patent/([A-Z]{2}\d+)"
|
58 |
+
match = re.search(pattern, url)
|
59 |
+
return match.group(1) if match else None
|
60 |
+
|
61 |
+
def download_pdf(patent_number):
|
62 |
+
try:
|
63 |
+
patent_downloader = PatentDownloader(verbose=True)
|
64 |
+
output_path = patent_downloader.download(patents=patent_number, output_path=tempfile.gettempdir())
|
65 |
+
return output_path[0]
|
66 |
+
except Exception as e:
|
67 |
+
st.error(f"Failed to download patent PDF: {e}")
|
68 |
+
st.stop()
|
69 |
+
|
70 |
def clean_extracted_text(text):
|
71 |
"""
|
72 |
Cleans extracted text to remove metadata, headers, and irrelevant content.
|