I am trying to scrape YouTube Trending video details - video_title, vedio_url, channel name, and channel description using Selenium-python and requests libraries.
The youtube trending URL is: https://www.youtube.com/feed/trending
I have identified the main container of videos
1 . I have successfully scraped video_title and title_url, But I was not able to scrape the channel name by the tag it displayed.
The sample code is this:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.common.by import By
trending_videos_url = 'https://www.youtube.com/feed/trending?bp=6gQJRkVleHBsb3Jl'
chrome_driver_path = 'C:/Users/Home/Desktop/chromedriver.exe'
# Create a ChromeService object with the executable path
chrome_service = Service(executable_path=chrome_driver_path)
# Pass the service to the Chrome constructor using the service argument
driver = webdriver.Chrome(service=chrome_service)
driver.get(trending_videos_url)
print(driver.title)
VIDEO_DIV_TAG = 'ytd-video-renderer'
vedio_div = driver.find_elements(By.TAG_NAME,VIDEO_DIV_TAG)
## title , url , channel , views , uploaded , description
vedio1 = vedio_div[0]
def parse_vedio(vedio):
title_tag = vedio.find_element(By.ID, 'video-title')
title = title_tag.text
url = title_tag.get_attribute('href')
thumbnail_tag = vedio.find_element(By.TAG_NAME, 'img')
thumbnail_url = thumbnail_tag.get_attribute('src')
channel_div = vedio.find_element(By.TAG_NAME,'.yt-simple-endpoint style-scope yt-formatted-string')
channel_name = channel_div.text
channel_desc_tag = vedio.find_element(By.ID, 'description-text')
channel_desc = channel_desc_tag.text
return {
'title':title,
'url': url,
'thumbnail_img': thumbnail_url,
'channel_name': channel_name,
'channel_desc': channel_desc,
}
if __name__ == '__main__':
print('parsing top 10 vedios')
videos_data = [parse_vedio(vedio) for vedio in vedio_div[:10]]
print(videos_data)```
when i am trying to target the <a> TAG with class name = ‘ yt-simple-endpoint style-scope yt- formatted-string ‘. I am getting the following error:nosuchelementexception
I have tried with CUSTOM TAG <yt-formatted-string>,i was successful in scraping the channel name