I am trying to run a python script using Selenium in my AWS lambda but it's returning the below error:
File "/var/lang/lib/python3.11/site-packages/webdriver_manager/core/driver.py", line 48, in get_driver_version_to_download
return self.get_latest_release_version()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/lang/lib/python3.11/site-packages/webdriver_manager/drivers/chrome.py", line 64, in get_latest_release_version
determined_browser_version = ".".join(determined_browser_version.split(".")[:3])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'NoneType' object has no attribute 'split'
My AWS Lambda is running inside a container image with the following Dockerfile:
# Build from AWS Lambda ECR image with Python 3.11
FROM public.ecr.aws/lambda/python:3.11 as build
RUN yum install -y unzip && \
curl -Lo "/tmp/chromedriver-linux64.zip" "https://storage.googleapis.com/chrome-for-testing-public/122.0.6261.111/linux64/chromedriver-linux64.zip" && \
curl -Lo "/tmp/chrome-linux64.zip" "https://storage.googleapis.com/chrome-for-testing-public/122.0.6261.111/linux64/chrome-linux64.zip" && \
unzip /tmp/chromedriver-linux64.zip -d /opt/ && \
unzip /tmp/chrome-linux64.zip -d /opt/
FROM public.ecr.aws/lambda/python:3.11
RUN yum install -y atk cups-libs gtk3 libXcomposite alsa-lib \
libXcursor libXdamage libXext libXi libXrandr libXScrnSaver \
libXtst pango at-spi2-atk libXt xorg-x11-server-Xvfb \
xorg-x11-xauth dbus-glib dbus-glib-devel nss mesa-libgbm
COPY --from=build /opt/chrome-linux64 /opt/chrome
COPY --from=build /opt/chromedriver-linux64 /opt/
# Copy the requirements file
COPY requirements.txt ${LAMBDA_TASK_ROOT}
# Install the requirements
RUN pip install -r requirements.txt
# Copy the files to the lambda tasks root
COPY . ${LAMBDA_TASK_ROOT}
# Run the handler
CMD ["main.main"]
This is my requirements.txt:
bs4==0.0.2
dynamodb-json==1.3
kink==0.7.0
pynamodb==6.0.0
selenium==4.18.1
webdriver-manager==4.0.1
And this is my python script:
import logging
from kink import di
from datetime import datetime
from bs4 import BeautifulSoup
from models.core.di import main_injection
from models.core.web_driver import WebDriver
from models.db.opening import Opening
from utils.extractors import retrieve_tag_href, retrieve_tag_text
from models.db.opening_dao import ItemDao
# Initialize Logging
logging.getLogger().setLevel(logging.INFO)
@main_injection
def main(event, context):
# Create a web driver instance
web_driver = WebDriver(di["URL"], di["DELAY"])
# Log event
logging.info("Web driver has been intialized. Retrieving openings...")
# Load the opening elements
opening_elements = web_driver.load_elements(di["PRINCIPAL_FILTER"])
# Extract the HTML of all openings elements, parse them with BS4 and save to JSON
openings = []
# Log event
logging.info("Filtering the openings")
for opening in opening_elements:
# outer = position.get_attribute("outerHTML")
soup = BeautifulSoup(opening.get_attribute("outerHTML"), "html.parser")
opening_title = retrieve_tag_text(soup, di["FILTERS_NAME"])
opening_posted_date = retrieve_tag_text(soup, di["FILTER_POSTED_DATE"])
link = retrieve_tag_href(soup, di["FILTER_LINK"])
openings.append(
Opening(
id=link,
title=opening_title,
posted_date=opening_posted_date,
recruiter=di["RECRUITER"],
updated_at=datetime.now().strftime("%Y-%m-%d"),
)
)
if len(openings) > 0:
# Log event
logging.info(
f"{len(openings)} openings obtained from recruiter {di['RECRUITER']}"
)
# Create a new ItemDao object
item_dao = ItemDao()
# Log event
logging.info("Retrieving previous opening...")
# Retrieve the previous openings
previous_openings = item_dao.get_items_by_recruiter(recruiter=di["RECRUITER"])
# Log event
logging.info(
f"{len(previous_openings)} previous openings obtained from recruiter {di['RECRUITER']}"
)
# Clear the previous openings from that recruiter
item_dao.delete_all(openings=previous_openings)
# Log event
logging.info("Previous openings deleted")
# Save the new openings
item_dao.save_all(openings=openings)
# Log event
logging.info("New openings saved successfully")
# Close the WebDriver
web_driver.quit()
# Log event
logging.info("Script completed successfully.")
models/core/web_driver.py
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
class WebDriver:
def __init__(self, url, delay) -> None:
# Set up Chrome WebDriver
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--headless=new")
# Load chrome driver manager service
self.chrome = webdriver.Chrome(
service=Service(ChromeDriverManager().install()), options=chrome_options
)
# Open the desired webpage
self.chrome.get(url)
# Wait for the "openings" tag to load
self.wait = WebDriverWait(self.chrome, delay)
def load_elements(self, main_filter):
return self.wait.until(
EC.presence_of_all_elements_located((By.CLASS_NAME, main_filter))
)
def quit(self):
# Quit the chrome driver
self.chrome.quit()
This works fine locally on my laptop but if i run it in a docker container or in my AWS lambda, i get the above error.