Files
org-roamings/20220530080115-youtube.org
2022-06-04 12:57:39 +02:00

1.7 KiB

Youtube

Introduction

Est-ce nécessaire ?

Obtenir l'id d'un channel afin de s'abonner à son flux RSS

# pip install scrapy

from scrapy import Spider, Request
from scrapy.crawler import CrawlerProcess
from scrapy.utils.log import configure_logging


class YoutubeChannelIdSpider(Spider):
    name = "youtube_channel"
    allowed_domains = ['youtube.com']

    def __init__(self, channel, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.channel = channel
        self.url = f'https://www.youtube.com/c/{channel}'

    def start_requests(self):
        yield Request(url=self.url, callback=self.parse, cookies={'CONSENT': 'YES+1'})

    def parse(self, response):
        if channel_id := response.xpath('//meta[@itemprop="channelId"]/@content').get():
            print(f'{self.channel} channel ID is : "{channel_id}"')
        else:
            print(f'Unable to find ID for channel {self.channel}')

channel = input("What's the Youtube channel for which the ID shall be retrieved ?")

process = CrawlerProcess()
process.crawl(YoutubeChannelIdSpider, channel=channel)
process.start() # the script will block here until the crawling is finished
AAAAAAAAAAAAAAAAAAAAAAAAA
url = 'https://www.youtube.com/c/Matrixdotorg/'