This commit is contained in:
2025-05-11 18:58:43 +02:00
parent d5a73f342e
commit f3f3045ebf
48 changed files with 686 additions and 243 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,62 +0,0 @@
from pathlib import Path
from typing import TYPE_CHECKING
from bs4 import BeautifulSoup
import httpx
import requests
from data.path_constant import DOWNLOAD_DIR
from data.webtoon_request import get_bomtoon_headers
from downloaders.downloader import Downloader
class Bomtoon(Downloader):
def __init__(self, webtoon_id):
super().__init__(webtoon_id)
self.headers = get_bomtoon_headers()
def _fetch_information(self, url):
res = requests.get(url, headers=self.headers)
if res.status_code == 200:
soup = BeautifulSoup(res.content, 'html.parser')
title = soup.find('title')
if title:
self.title = title.get_text().split('-')[0].strip()
author = soup.find('meta', attrs={'name': 'author'})
if author:
self.author = author.get('content')
description = soup.find('meta', attrs={'property': 'og:description'})
if description:
self.description = description.get('content')
tags = soup.find('meta', attrs={'name': 'keywords'})
if tags:
tags_list = tags.get('content').split(',')
if '連載' in tags_list[0]:
self.tag = tags_list[1]
else:
self.tag = tags_list[0]
self.thumbnail_url = ""
self.thumbnail_name = self.webtoon_id + '.jpg'
else:
print(f"fetch_information: {res.status_code}")
def _fetch_episode_information(self):
pass
def _get_episode_image_urls(self, episode_index) -> list[str]:
pass
async def _download_image(
self,
episode_path: Path,
url: str,
image_no: int
) -> None:
pass

View File

@@ -1,8 +1,7 @@
import base64
import hashlib
from contextlib import suppress
from WebtoonScraper.exceptions import MissingOptionalDependencyError
# from Cryptodome.Cipher import AES
class Decrypt :
def __init__(self, aid, episodeId, timestamp, nonce, userId, zid):
@@ -18,7 +17,7 @@ class Decrypt :
with suppress(AttributeError):
return cls.AES
try:
from Cryptodome.Cipher import AES
from Crypto.Cipher import AES
except ImportError:
raise ImportError("Missing optional dependency 'pycryptodomex'. Please install it to use this functionality.")

View File

@@ -2,7 +2,7 @@ import asyncio
import html
import json
from pathlib import Path
import pyfilename as pf
#import pyfilename as pf
import shutil
import time
from httpx import AsyncClient
@@ -53,11 +53,13 @@ class Downloader:
if information_path.exists():
with open(information_path, "r", encoding='utf-8') as json_file:
existing_information = json.load(json_file)
if (self.author == ""):
save_necessary = False
if (
existing_information["title"] == self.title and
existing_information["author"] == self.author and
existing_information["description"] == self.description and
existing_information["thumbnail_name"] == self.thumbnail_name
existing_information["thumbnail"] == self.thumbnail_name
):
save_necessary = False
if (save_necessary):
@@ -66,7 +68,7 @@ class Downloader:
"author": self.author,
"tag": self.tag,
"description": self.description,
"thumbnail_name": self.thumbnail_name
"thumbnail": self.thumbnail_name
}
with open(information_path, 'w', encoding='utf-8') as json_file:
@@ -91,10 +93,15 @@ class Downloader:
def _get_unobtained_episodes(self) -> list[int]:
downloaded_episodes = []
for dir in self.webtoon_path.glob('*'):
if dir.is_dir():
downloaded_episodes.append(int(dir.name.split('.')[0]))
if self.title == "反派角色只有死亡結局":
downloaded_episodes = [i - 2 for i in downloaded_episodes]
if self.title == "成為我筆下男主角的妻子" or self.title == "領主夫人罷工中":
downloaded_episodes = [i - 1 for i in downloaded_episodes]
if self.title in WEBTOON_18_BONUS:
count = len(self.readablities_index_list) - len(downloaded_episodes)
@@ -114,6 +121,10 @@ class Downloader:
for episode_index in episode_index_list:
episode_name = self.episode_titles[episode_index]
episode_title = self._get_safe_file_name(episode_index, episode_name)
if self.title == "反派角色只有死亡結局":
episode_title = self._get_safe_file_name(episode_index + 2, episode_name)
if self.title == "成為我筆下男主角的妻子" or self.title == "領主夫人罷工中":
episode_title = self._get_safe_file_name(episode_index + 1, episode_name)
# episode_title = self._get_safe_file_name(f"{episode_index}.{self.episode_titles[episode_index]}")
print(episode_title)
episode_path = self.webtoon_path / episode_title
@@ -165,4 +176,4 @@ class Downloader:
episode_title = f"{episode_index}.{episode_name}"
return pf.convert(html.unescape(episode_title))
return html.unescape(episode_title)

View File

@@ -10,6 +10,7 @@ import requests
from data.path_constant import DOWNLOAD_DIR, DOWNLOAD_LIST_TXT
from data.kakao_cookie import Cookie
from data.kakao_request import KakaoRequest
from data.special_list import KAKAO_TO_TW
from downloaders.decrypt import Decrypt
from downloaders.downloader import Downloader
@@ -27,7 +28,7 @@ class KakaoWebtoon(Downloader):
self.post_headers = self.kakaoRequest.get_post_headers(self.cookie.ant)
def verify_cookie(self) -> bool:
url = f"https://gateway.tw.kakaowebtoon.com/episode/v2/views/content-home/contents/{self.webtoon_id}/episodes?sort=-NO&offset=0&limit=30"
url = f"https://gateway.webtoon.kakao.com/episode/v2/views/content-home/contents/{self.webtoon_id}/episodes?sort=-NO&offset=0&limit=30"
res = requests.get(url, headers=self.episode_headers)
return res.status_code == 200
@@ -39,15 +40,22 @@ class KakaoWebtoon(Downloader):
description = soup.find('meta', attrs={'name': 'description'})
if description:
self.description = description.get('content')
self.description = ""
thumbnail_url = soup.find('meta', attrs={'property': 'og:image'})
if thumbnail_url:
self.thumbnail_url = thumbnail_url.get('content')
all_p = soup.find_all('p')
self.title = all_p[0].get_text()
self.author = all_p[1].get_text()
self.tag = all_p[2].get_text()
title = all_p[0].get_text()
if title in KAKAO_TO_TW:
self.title = KAKAO_TO_TW.get(title)
self.author = ""
self.tag = ""
else:
self.title = title
self.author = all_p[1].get_text()
self.tag = all_p[2].get_text()
self.thumbnail_name = self.webtoon_id + '.' + self.thumbnail_url.split('.')[-1]
def _fetch_episode_information(self):
@@ -56,7 +64,7 @@ class KakaoWebtoon(Downloader):
is_last: bool = False
webtoon_episodes_data = []
while not is_last:
url = f"https://gateway.tw.kakaowebtoon.com/episode/v2/views/content-home/contents/{self.webtoon_id}/episodes?sort=-NO&offset={offset}&limit={limit}"
url = f"https://gateway-kw.kakao.com/episode/v2/views/content-home/contents/{self.webtoon_id}/episodes?sort=-NO&offset={offset}&limit={limit}"
res = requests.get(url, headers=self.episode_headers)
if res.status_code == 200:
json_data = res.json()
@@ -88,15 +96,15 @@ class KakaoWebtoon(Downloader):
self.episode_ids = episode_ids
self.seo_ids = seo_ids
self.episode_titles = episode_titles
self.readablities_index_list = [index for index, value in enumerate(readablities) if value == True]
self.readablities_index_list = [index for index, value in enumerate(readablities) if value == True]
def _get_episode_image_urls(self, episode_index) -> list[tuple[str, bytes, bytes]] | None:
episode_id = self.episode_ids[episode_index]
url = f"https://gateway.tw.kakaowebtoon.com/episode/v1/views/viewer/episodes/{episode_id}/media-resources"
url = f"https://gateway-kw.kakao.com/episode/v1/views/viewer/episodes/{episode_id}/media-resources"
payload = self.kakaoRequest.get_payload(episode_id)
res = requests.post(url, headers=self.post_headers, json=payload)
data = res.json()["data"]
aid = data["media"]["aid"]