Initial commit: Add webtoon downloader
This commit is contained in:
141
downloaders/kakao_webtoon.py
Normal file
141
downloaders/kakao_webtoon.py
Normal file
@@ -0,0 +1,141 @@
|
||||
from pathlib import Path
|
||||
import random
|
||||
import sys
|
||||
import time
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from httpx import AsyncClient, RequestError, HTTPStatusError
|
||||
import httpx
|
||||
import requests
|
||||
from data.path_constant import DOWNLOAD_DIR, DOWNLOAD_LIST_TXT
|
||||
from data.kakao_cookie import Cookie
|
||||
from data.kakao_request import KakaoRequest
|
||||
from downloaders.decrypt import Decrypt
|
||||
from downloaders.downloader import Downloader
|
||||
|
||||
|
||||
class KakaoWebtoon(Downloader):
|
||||
def __init__(self, webtoon_id: int, cookie: Cookie):
|
||||
super().__init__(webtoon_id)
|
||||
self._timestamp = int(time.time() * 1000)
|
||||
chars = [*range(0x30, 0x3A), *range(0x61, 0x7B)]
|
||||
self._nonce = "".join(chr(i) for i in random.choices(chars, k=10))
|
||||
|
||||
self.kakaoRequest = KakaoRequest(self._timestamp, self._nonce)
|
||||
self.cookie = cookie
|
||||
self.episode_headers = self.kakaoRequest.get_episode_headers(self.cookie.ant)
|
||||
self.post_headers = self.kakaoRequest.get_post_headers(self.cookie.ant)
|
||||
|
||||
def verify_cookie(self) -> bool:
|
||||
url = f"https://gateway.tw.kakaowebtoon.com/episode/v2/views/content-home/contents/{self.webtoon_id}/episodes?sort=-NO&offset=0&limit=30"
|
||||
res = requests.get(url, headers=self.episode_headers)
|
||||
return res.status_code == 200
|
||||
|
||||
def _fetch_information(self, url):
|
||||
res = requests.get(url, headers=self.episode_headers)
|
||||
|
||||
if res.status_code == 200:
|
||||
soup = BeautifulSoup(res.content, 'html.parser')
|
||||
description = soup.find('meta', attrs={'name': 'description'})
|
||||
if description:
|
||||
self.description = description.get('content')
|
||||
thumbnail_url = soup.find('meta', attrs={'property': 'og:image'})
|
||||
if thumbnail_url:
|
||||
self.thumbnail_url = thumbnail_url.get('content')
|
||||
|
||||
all_p = soup.find_all('p')
|
||||
|
||||
self.title = all_p[0].get_text()
|
||||
self.author = all_p[1].get_text()
|
||||
self.tag = all_p[2].get_text()
|
||||
self.thumbnail_name = self.webtoon_id + '.' + self.thumbnail_url.split('.')[-1]
|
||||
|
||||
def _fetch_episode_information(self):
|
||||
offset = 0
|
||||
limit = 30
|
||||
is_last: bool = False
|
||||
webtoon_episodes_data = []
|
||||
while not is_last:
|
||||
url = f"https://gateway.tw.kakaowebtoon.com/episode/v2/views/content-home/contents/{self.webtoon_id}/episodes?sort=-NO&offset={offset}&limit={limit}"
|
||||
res = requests.get(url, headers=self.episode_headers)
|
||||
if res.status_code == 200:
|
||||
json_data = res.json()
|
||||
|
||||
webtoon_episodes_data += json_data["data"]["episodes"]
|
||||
offset += limit
|
||||
is_last = json_data["meta"]["pagination"]["last"]
|
||||
else:
|
||||
print("_fetch_episode_information")
|
||||
print(self.cookie.name)
|
||||
print(res.status_code)
|
||||
sys.exit()
|
||||
|
||||
|
||||
episode_ids: list[int] = []
|
||||
seo_ids: list[str] = []
|
||||
numbers: list[int] = []
|
||||
episode_titles: list[str] = []
|
||||
readablities: list[bool] = []
|
||||
|
||||
for information in reversed(webtoon_episodes_data):
|
||||
episode_ids.append(information["id"])
|
||||
seo_ids.append(information["seoId"])
|
||||
numbers.append(information["no"])
|
||||
episode_titles.append(information["title"])
|
||||
readablities.append(information["readable"])
|
||||
|
||||
|
||||
self.episode_ids = episode_ids
|
||||
self.seo_ids = seo_ids
|
||||
self.episode_titles = episode_titles
|
||||
self.readablities_index_list = [index for index, value in enumerate(readablities) if value == True]
|
||||
|
||||
def _get_episode_image_urls(self, episode_index) -> list[tuple[str, bytes, bytes]] | None:
|
||||
episode_id = self.episode_ids[episode_index]
|
||||
|
||||
url = f"https://gateway.tw.kakaowebtoon.com/episode/v1/views/viewer/episodes/{episode_id}/media-resources"
|
||||
payload = self.kakaoRequest.get_payload(episode_id)
|
||||
res = requests.post(url, headers=self.post_headers, json=payload)
|
||||
|
||||
data = res.json()["data"]
|
||||
|
||||
aid = data["media"]["aid"]
|
||||
zid = data["media"]["zid"]
|
||||
|
||||
self.decrypt = Decrypt(aid, episode_id, self._timestamp, self._nonce, self.cookie.userID, zid)
|
||||
key, iv = self.decrypt.get_decrypt_infomations()
|
||||
|
||||
return [(i["url"], key, iv) for i in data["media"]["files"]]
|
||||
|
||||
async def _download_image(
|
||||
self,
|
||||
episode_path: Path,
|
||||
url: tuple[str, bytes, bytes],
|
||||
image_no: int
|
||||
) -> None:
|
||||
real_url, key, iv = url
|
||||
file_extension = 'webp'
|
||||
file_name = f"{image_no:03d}.{file_extension}"
|
||||
file_path = episode_path /file_name
|
||||
|
||||
try:
|
||||
image_raw: bytes = (await self.client.get(real_url, headers=self.episode_headers)).content
|
||||
except httpx.RequestError as e:
|
||||
print(f"An error occurred while requesting {url}: {e}")
|
||||
except httpx.HTTPStatusError as e:
|
||||
print(f"HTTP error occurred: {e}")
|
||||
except httpx.TimeoutException as e:
|
||||
print(f"Timeout error occurred: {e}")
|
||||
except httpx.UnsupportedProtocol as e:
|
||||
print(f"Unsupported protocol error occurred: {e}")
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred: {e}")
|
||||
except Exception as e:
|
||||
print(f"Error get image_raw: {file_path}: {e}")
|
||||
|
||||
decrypted_data = self.decrypt._decrypt(image_raw, key, iv)
|
||||
|
||||
file_path.write_bytes(decrypted_data)
|
||||
|
||||
async def close(self):
|
||||
await self.client.aclose()
|
||||
Reference in New Issue
Block a user