53 lines
1.7 KiB
Org Mode
53 lines
1.7 KiB
Org Mode
:PROPERTIES:
|
|
:ID: 8e30289a-0064-4803-ae64-2e1d8285dd3c
|
|
:mtime: 20220530091227
|
|
:ctime: 20220530080115
|
|
:END:
|
|
#+title: Youtube
|
|
|
|
* Introduction
|
|
Est-ce nécessaire ?
|
|
|
|
* Obtenir l'id d'un /channel/ afin de s'abonner à son flux RSS
|
|
#+BEGIN_SRC python :results output verbatim
|
|
# pip install scrapy
|
|
|
|
from scrapy import Spider, Request
|
|
from scrapy.crawler import CrawlerProcess
|
|
from scrapy.utils.log import configure_logging
|
|
|
|
|
|
class YoutubeChannelIdSpider(Spider):
|
|
name = "youtube_channel"
|
|
allowed_domains = ['youtube.com']
|
|
|
|
def __init__(self, channel, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
self.channel = channel
|
|
self.url = f'https://www.youtube.com/c/{channel}'
|
|
|
|
def start_requests(self):
|
|
yield Request(url=self.url, callback=self.parse, cookies={'CONSENT': 'YES+1'})
|
|
|
|
def parse(self, response):
|
|
if channel_id := response.xpath('//meta[@itemprop="channelId"]/@content').get():
|
|
print(f'{self.channel} channel ID is : "{channel_id}"')
|
|
else:
|
|
print(f'Unable to find ID for channel {self.channel}')
|
|
|
|
channel = input("What's the Youtube channel for which the ID shall be retrieved ?")
|
|
|
|
process = CrawlerProcess()
|
|
process.crawl(YoutubeChannelIdSpider, channel=channel)
|
|
process.start() # the script will block here until the crawling is finished
|
|
#+END_SRC
|
|
|
|
#+RESULTS:
|
|
: AAAAAAAAAAAAAAAAAAAAAAAAA
|
|
: url = 'https://www.youtube.com/c/Matrixdotorg/'
|
|
|
|
* Références
|
|
* [[https://docs.scrapy.org/en/latest/index.html][Docs - Scrapy]]
|
|
* [[https://danielmiessler.com/blog/rss-feed-youtube-channel/][Rss feed youtube channel - Daniel Miessler]]
|
|
* [[https://commentpicker.com/youtube-channel-id.php#youtube-channel-id][Comment Picker]]
|