AttributeError: 'NoneType' object has no attribute 'split' with Python Selenium in AWS Lambda

111 views Asked by At

I am trying to run a python script using Selenium in my AWS lambda but it's returning the below error:

  File "/var/lang/lib/python3.11/site-packages/webdriver_manager/core/driver.py", line 48, in get_driver_version_to_download
    return self.get_latest_release_version()
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/var/lang/lib/python3.11/site-packages/webdriver_manager/drivers/chrome.py", line 64, in get_latest_release_version
    determined_browser_version = ".".join(determined_browser_version.split(".")[:3])
                                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'NoneType' object has no attribute 'split'

My AWS Lambda is running inside a container image with the following Dockerfile:

# Build from AWS Lambda ECR image with Python 3.11
FROM public.ecr.aws/lambda/python:3.11 as build

RUN yum install -y unzip && \
    curl -Lo "/tmp/chromedriver-linux64.zip" "https://storage.googleapis.com/chrome-for-testing-public/122.0.6261.111/linux64/chromedriver-linux64.zip" && \
    curl -Lo "/tmp/chrome-linux64.zip" "https://storage.googleapis.com/chrome-for-testing-public/122.0.6261.111/linux64/chrome-linux64.zip" && \
    unzip /tmp/chromedriver-linux64.zip -d /opt/ && \
    unzip /tmp/chrome-linux64.zip -d /opt/

FROM public.ecr.aws/lambda/python:3.11

RUN yum install -y atk cups-libs gtk3 libXcomposite alsa-lib \
    libXcursor libXdamage libXext libXi libXrandr libXScrnSaver \
    libXtst pango at-spi2-atk libXt xorg-x11-server-Xvfb \
    xorg-x11-xauth dbus-glib dbus-glib-devel nss mesa-libgbm

COPY --from=build /opt/chrome-linux64 /opt/chrome
COPY --from=build /opt/chromedriver-linux64 /opt/

# Copy the requirements file
COPY requirements.txt ${LAMBDA_TASK_ROOT}

# Install the requirements
RUN pip install -r requirements.txt

# Copy the files to the lambda tasks root
COPY . ${LAMBDA_TASK_ROOT}

# Run the handler
CMD ["main.main"]

This is my requirements.txt:

bs4==0.0.2
dynamodb-json==1.3
kink==0.7.0
pynamodb==6.0.0
selenium==4.18.1
webdriver-manager==4.0.1

And this is my python script:

import logging
from kink import di
from datetime import datetime
from bs4 import BeautifulSoup
from models.core.di import main_injection
from models.core.web_driver import WebDriver
from models.db.opening import Opening
from utils.extractors import retrieve_tag_href, retrieve_tag_text
from models.db.opening_dao import ItemDao

# Initialize Logging
logging.getLogger().setLevel(logging.INFO)


@main_injection
def main(event, context):
    # Create a web driver instance
    web_driver = WebDriver(di["URL"], di["DELAY"])

    # Log event
    logging.info("Web driver has been intialized. Retrieving openings...")

    # Load the opening elements
    opening_elements = web_driver.load_elements(di["PRINCIPAL_FILTER"])

    # Extract the HTML of all openings elements, parse them with BS4 and save to JSON
    openings = []

    # Log event
    logging.info("Filtering the openings")

    for opening in opening_elements:
        # outer = position.get_attribute("outerHTML")
        soup = BeautifulSoup(opening.get_attribute("outerHTML"), "html.parser")
        opening_title = retrieve_tag_text(soup, di["FILTERS_NAME"])
        opening_posted_date = retrieve_tag_text(soup, di["FILTER_POSTED_DATE"])
        link = retrieve_tag_href(soup, di["FILTER_LINK"])

        openings.append(
            Opening(
                id=link,
                title=opening_title,
                posted_date=opening_posted_date,
                recruiter=di["RECRUITER"],
                updated_at=datetime.now().strftime("%Y-%m-%d"),
            )
        )

    if len(openings) > 0:
        # Log event
        logging.info(
            f"{len(openings)} openings obtained from recruiter {di['RECRUITER']}"
        )

        # Create a new ItemDao object
        item_dao = ItemDao()

        # Log event
        logging.info("Retrieving previous opening...")

        # Retrieve the previous openings
        previous_openings = item_dao.get_items_by_recruiter(recruiter=di["RECRUITER"])

        # Log event
        logging.info(
            f"{len(previous_openings)} previous openings obtained from recruiter {di['RECRUITER']}"
        )

        # Clear the previous openings from that recruiter
        item_dao.delete_all(openings=previous_openings)

        # Log event
        logging.info("Previous openings deleted")

        # Save the new openings
        item_dao.save_all(openings=openings)

        # Log event
        logging.info("New openings saved successfully")

    # Close the WebDriver
    web_driver.quit()

    # Log event
    logging.info("Script completed successfully.")

models/core/web_driver.py

from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


class WebDriver:
    def __init__(self, url, delay) -> None:
        # Set up Chrome WebDriver
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("--headless=new")

        # Load chrome driver manager service
        self.chrome = webdriver.Chrome(
            service=Service(ChromeDriverManager().install()), options=chrome_options
        )

        # Open the desired webpage
        self.chrome.get(url)

        # Wait for the "openings" tag to load
        self.wait = WebDriverWait(self.chrome, delay)

    def load_elements(self, main_filter):
        return self.wait.until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, main_filter))
        )

    def quit(self):
        # Quit the chrome driver
        self.chrome.quit()

This works fine locally on my laptop but if i run it in a docker container or in my AWS lambda, i get the above error.

0

There are 0 answers