DrishtiSharma commited on
Commit
52578a6
·
verified ·
1 Parent(s): 06e96cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -0
app.py CHANGED
@@ -52,6 +52,21 @@ def check_poppler_installed():
52
 
53
  check_poppler_installed()
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  def clean_extracted_text(text):
56
  """
57
  Cleans extracted text to remove metadata, headers, and irrelevant content.
 
52
 
53
  check_poppler_installed()
54
 
55
+
56
+ def extract_patent_number(url):
57
+ pattern = r"/patent/([A-Z]{2}\d+)"
58
+ match = re.search(pattern, url)
59
+ return match.group(1) if match else None
60
+
61
+ def download_pdf(patent_number):
62
+ try:
63
+ patent_downloader = PatentDownloader(verbose=True)
64
+ output_path = patent_downloader.download(patents=patent_number, output_path=tempfile.gettempdir())
65
+ return output_path[0]
66
+ except Exception as e:
67
+ st.error(f"Failed to download patent PDF: {e}")
68
+ st.stop()
69
+
70
  def clean_extracted_text(text):
71
  """
72
  Cleans extracted text to remove metadata, headers, and irrelevant content.