barunsaha commited on
Commit
bb25da3
·
1 Parent(s): 7f63688

Set user-agent for Pexels API call

Browse files
Files changed (1) hide show
  1. helpers/image_search.py +30 -5
helpers/image_search.py CHANGED
@@ -9,16 +9,20 @@ from typing import Union, Tuple, Literal
9
  from urllib.parse import urlparse, parse_qs
10
 
11
  import requests
 
 
 
 
12
 
13
 
14
  REQUEST_TIMEOUT = 12
15
  MAX_PHOTOS = 3
16
 
17
 
18
- # Only show warnings
19
- logging.getLogger('urllib3').setLevel(logging.WARNING)
20
  # Disable all child loggers of urllib3, e.g. urllib3.connectionpool
21
- logging.getLogger('urllib3').propagate = False
22
 
23
 
24
 
@@ -33,6 +37,14 @@ def search_pexels(
33
  This function sends a GET request to the Pexels API with the specified search query
34
  and authorization header containing the API key. It returns the JSON response from the API.
35
 
 
 
 
 
 
 
 
 
36
  :param query: The search query for finding images.
37
  :param size: The size of the images: small, medium, or large.
38
  :param per_page: No. of results to be displayed per page.
@@ -42,7 +54,8 @@ def search_pexels(
42
 
43
  url = 'https://api.pexels.com/v1/search'
44
  headers = {
45
- 'Authorization': os.getenv('PEXEL_API_KEY')
 
46
  }
47
  params = {
48
  'query': query,
@@ -101,7 +114,11 @@ def get_image_from_url(url: str) -> BytesIO:
101
  :raises requests.exceptions.RequestException: If the request to the URL fails.
102
  """
103
 
104
- response = requests.get(url, stream=True, timeout=REQUEST_TIMEOUT)
 
 
 
 
105
  response.raise_for_status()
106
  image_data = BytesIO(response.content)
107
 
@@ -121,3 +138,11 @@ def extract_dimensions(url: str) -> Tuple[int, int]:
121
  height = int(query_params.get('h', [0])[0])
122
 
123
  return width, height
 
 
 
 
 
 
 
 
 
9
  from urllib.parse import urlparse, parse_qs
10
 
11
  import requests
12
+ from dotenv import load_dotenv
13
+
14
+
15
+ load_dotenv()
16
 
17
 
18
  REQUEST_TIMEOUT = 12
19
  MAX_PHOTOS = 3
20
 
21
 
22
+ # Only show errors
23
+ logging.getLogger('urllib3').setLevel(logging.ERROR)
24
  # Disable all child loggers of urllib3, e.g. urllib3.connectionpool
25
+ # logging.getLogger('urllib3').propagate = True
26
 
27
 
28
 
 
37
  This function sends a GET request to the Pexels API with the specified search query
38
  and authorization header containing the API key. It returns the JSON response from the API.
39
 
40
+ [2024-08-31] Note:
41
+ `curl` succeeds but API call via Python `requests` fail. Apparently, this could be due to
42
+ Cloudflare (or others) blocking the requests, perhaps identifying as Web-scraping. So,
43
+ changing the user-agent to Firefox.
44
+ https://stackoverflow.com/a/74674276/147021
45
+ https://stackoverflow.com/a/51268523/147021
46
+ https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent/Firefox#linux
47
+
48
  :param query: The search query for finding images.
49
  :param size: The size of the images: small, medium, or large.
50
  :param per_page: No. of results to be displayed per page.
 
54
 
55
  url = 'https://api.pexels.com/v1/search'
56
  headers = {
57
+ 'Authorization': os.getenv('PEXEL_API_KEY'),
58
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
59
  }
60
  params = {
61
  'query': query,
 
114
  :raises requests.exceptions.RequestException: If the request to the URL fails.
115
  """
116
 
117
+ headers = {
118
+ 'Authorization': os.getenv('PEXEL_API_KEY'),
119
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
120
+ }
121
+ response = requests.get(url, headers=headers, stream=True, timeout=REQUEST_TIMEOUT)
122
  response.raise_for_status()
123
  image_data = BytesIO(response.content)
124
 
 
138
  height = int(query_params.get('h', [0])[0])
139
 
140
  return width, height
141
+
142
+
143
+ if __name__ == '__main__':
144
+ print(
145
+ search_pexels(
146
+ query='people'
147
+ )
148
+ )