64 lines
2.1 KiB
Python
Executable File
64 lines
2.1 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
"""Get the Youtube channel ID from its name using webscraping."""
|
|
|
|
from argparse import ArgumentParser
|
|
from logging import WARNING
|
|
from typing import Iterator
|
|
|
|
from scrapy import Spider, Request
|
|
from scrapy.crawler import CrawlerProcess
|
|
from scrapy.http import Response
|
|
|
|
|
|
class YoutubeChannelIdSpider(Spider):
|
|
"""Spider in charge of Youtube channel ID scraping."""
|
|
|
|
name = "youtube_channel"
|
|
allowed_domains = ['youtube.com']
|
|
|
|
def __init__(self, channel, *args, **kwargs):
|
|
"""Init YoutubeChannelIdSpider.
|
|
|
|
Args:
|
|
channel: The name of the channel for which the ID will be retrieved.
|
|
*args: Non-keyword arguments transmitted to the Spider init method.
|
|
**kwargs: Keyword arguments given to the Spider init method.
|
|
"""
|
|
super().__init__(*args, **kwargs)
|
|
self.channel = channel
|
|
self.url = f'https://www.youtube.com/c/{channel}'
|
|
|
|
def start_requests(self) -> Iterator[Request]:
|
|
"""Lauch the HTTP request which allows to get Youtube channel main page."""
|
|
yield Request(url=self.url,
|
|
callback=self.parse,
|
|
cookies={'CONSENT': 'YES+1'})
|
|
|
|
def parse(self, response: Response, **kwargs) -> None:
|
|
"""Parse the Youtube channel main page and extract its ID from it.
|
|
|
|
Args:
|
|
response: The scapy.Response to parse.
|
|
**kwargs: Not used argument.
|
|
"""
|
|
if channel_id := response.xpath(
|
|
'//meta[@itemprop="channelId"]/@content').get():
|
|
print(channel_id)
|
|
else:
|
|
print(f'Unable to find ID for channel {self.channel}')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
parser = ArgumentParser()
|
|
parser.add_argument(
|
|
'channel',
|
|
help='The Youtube channel for which the ID shall be retrieved')
|
|
parser_args = parser.parse_args()
|
|
|
|
process = CrawlerProcess({'LOG_LEVEL': WARNING})
|
|
process.crawl(YoutubeChannelIdSpider, channel=parser_args.channel)
|
|
# The script will block here until the crawling is finished
|
|
process.start()
|