import asyncio
import re
from datetime import datetime
from logging import getLogger
from os import makedirs
from os.path import exists, join
from typing import Any, Dict, List, Optional, TYPE_CHECKING, Tuple
from aiohttp import ClientError
from .abc import GenericModelList, Model
from .group import Group
from .mixins import DatetimeMixin
from .user import User
from ..constants import invalid_folder_name_regex, routes
from ..utils import copy_key_to_attribute
logger = getLogger(__name__)
if TYPE_CHECKING:
from .manga import Manga
from ..client import MangadexClient
[docs]class Chapter(Model, DatetimeMixin):
"""A :class:`.Model` representing an individual chapter.
.. versionadded:: 0.3
"""
volume: Optional[str]
"""The volume of the chapter. ``None`` if the chapter belongs to no volumes."""
number: Optional[str]
"""The number of the chapter. ``None`` if the chapter is un-numbered (such as in an anthology).
.. note::
A chapter can have a number, a title, or both. If a chapter's number is ``None``, it must have a title.
"""
title: Optional[str]
"""The title of the chapter. ``None`` if the chapter does not have a title.
.. note::
A chapter can have a number, a title, or both. If a chapter's title is ``None``, it must have a number.
"""
language: str
"""The language of the chapter."""
hash: str
"""The chapter's hash."""
page_names: List[str]
"""A list of strings containing the filenames of the pages.
.. seealso:: :attr:`.data_saver_page_names`
"""
data_saver_page_names: List[str]
"""A list of strings containing the filenames of the data saver pages.
.. seealso:: :attr:`.page_names`
"""
publish_time: datetime
"""A :class:`datetime.datetime` representing the time the chapter was published.
.. seealso:: :attr:`.created_at`
.. note::
The datetime is **timezone aware** as it is parsed from an ISO-8601 string.
"""
manga: "Manga"
"""The manga that this chapter belongs to."""
user: User
"""The user that uploaded this chapter."""
groups: GenericModelList[Group]
"""The groups that uploaded this chapter."""
read: bool
"""Whether or not the chapter is read."""
def __init__(
self,
client: "MangadexClient",
*,
id: Optional[str] = None,
version: int = 0,
data: Optional[Dict[str, Any]] = None,
):
self.read = False
super().__init__(client, id=id, version=version, data=data)
@property
def name(self) -> str:
"""Returns a nicely formatted name based on available fields. Includes the volume number, chapter number,
and chapter title if any one or more of them exist.
:return: Formatted name
:rtype: str
"""
if self.number:
constructed = ""
if self.volume:
constructed += f"Volume {self.volume} "
if self.number.isdecimal():
num_rep = float(self.number)
if num_rep.is_integer():
num_rep = int(num_rep)
else:
num_rep = self.number
constructed += f"Chapter {num_rep}"
if self.title:
constructed += f": {self.title}"
return constructed
else:
return self.title
@property
def sorting_number(self) -> float:
"""Returns ``0`` if the chapter does not have a number, otherwise returns the chapter's number.
:return: A number usable for sorting.
:rtype: float
"""
return float(self.number) if self.number.isdecimal() else -1
[docs] async def pages(self, *, data_saver: bool = False, ssl_only: bool = False) -> List[str]:
"""Get fully formatted page URLs.
.. note::
The given page URLs are only valid for a short timeframe. These URLs cannot be used for hotlinking.
:param data_saver: Whether or not to return the pages for the data saver URLs. Defaults to ``False``.
:type data_saver: bool
:param ssl_only: Whether or not the given URL has port ``443``. Useful if your firewall blocks outbound
connections to ports that are not port ``443``. Defaults to ``False``.
.. note::
This will lower the pool of available clients and can cause higher latencies.
:type ssl_only: bool
:return: A list of valid URLs in the order of the pages.
:rtype: List[str]
"""
if not hasattr(self, "page_names"):
await self.fetch()
r = await self.client.request(
"GET", routes["md@h"].format(chapterId=self.id), params={"forcePort443": ssl_only}
)
base_url = (await r.json())["baseUrl"]
r.close()
return [
f"{base_url}/{'data-saver' if data_saver else 'data'}/{self.hash}/{filename}"
for filename in (self.data_saver_page_names if data_saver else self.page_names)
]
[docs] async def download_chapter(
self,
*,
folder_format: str = "{manga}/{chapter_num}{separator}{title}",
file_format: str = "{num}",
as_bytes_list: bool = False,
overwrite: bool = True,
retries: int = 3,
use_data_saver: bool = False,
ssl_only: bool = False,
) -> Optional[List[bytes]]:
"""Download all of the pages of the chapter and either save them locally to the filesystem or return the raw
bytes.
:param folder_format: The format of the folder to create for the chapter. The folder can already be existing.
The default format is ``{manga}/{chapter_num}{separator}{chapter_title}``.
.. note::
Specify ``.`` if you want to save the pages in the current folder.
Available variables:
* ``{manga}``: The name of the manga. If the chapter's manga object does not contain a title object,
it will be fetched.
* ``{chapter_num}``: The number of the chapter, if it exists.
* ``{separator}``: A separator if both the chapter's number and title exists.
* ``{title}``: The title of the chapter, if it exists.
:type folder_format: str
:param file_format: The format of the individual image file names. The default format is ``{num}``.
.. note::
The file extension is applied automatically from the real file name. There is no need to include it.
Available variables:
* ``{num}``: The numbering of the image files starting from 1. This respects the order the images are in
inside of :attr:`.page_names`.
* ``{num0}``: The same as ``{num}`` but starting from 0.
* ``{name}``: The actual filename of the image from :attr:`.page_names`, without the file extension.
:type file_format: str
:param as_bytes_list: Whether or not to return the pages as a list of raw bytes. Setting this parameter to
``True`` will ignore the value of the ``folder_format`` parameter.
:type as_bytes_list: bool
:param overwrite: Whether or not to override existing files with the same name as the page. Defaults to
``True``.
:type overwrite: bool
:param retries: How many times to retry a chapter if a MD@H node does not let us download the pages.
Defaults to ``3``.
:type retries: int
:param use_data_saver: Whether or not to use the data saver pages or the normal pages. Defaults to ``False``.
:type use_data_saver: bool
:param ssl_only: Whether or not the given URL has port ``443``. Useful if your firewall blocks outbound
connections to ports that are not port ``443``. Defaults to ``False``.
.. note::
This will lower the pool of available clients and can cause higher download times.
:type ssl_only: bool
:raises: :class:`aiohttp.ClientResponseError` if there is an error after all retries are exhausted.
:return: A list of byte strings if ``as_bytes_list`` is ``True`` else None.
:rtype: Optional[List[bytes]]
"""
if not hasattr(self, "page_names"):
await self.fetch()
pages = await self.pages(data_saver=use_data_saver, ssl_only=ssl_only)
try:
items = await asyncio.gather(*[self.client.get_page(url) for url in pages])
except ClientError as e:
if retries > 0:
logger.warning("Retrying download of chapter %s due to %s: %s", self.id, type(e).__name__, e)
return await self.download_chapter(
folder_format=folder_format,
as_bytes_list=as_bytes_list,
overwrite=overwrite,
retries=retries - 1,
use_data_saver=use_data_saver,
ssl_only=ssl_only,
)
else:
raise
else:
byte_list = await asyncio.gather(*[item.read() for item in items])
[item.close() for item in items]
if as_bytes_list:
return byte_list # NOQA: ignore; This is needed because for whatever reason PyCharm cannot guess the
# output of asyncio.gather()
else:
base = ""
if not as_bytes_list:
chapter_num = self.number or ""
separator = " - " if self.number and self.title else ""
title = (
re.sub("_{2,}", "_", invalid_folder_name_regex.sub("_", self.title.strip()))
if self.title
else ""
)
# This replaces invalid characters with underscores then deletes duplicate underscores in a
# series. This
# means that a name of ``ex___ample`` becomes ``ex_ample``.
if not self.manga.titles:
await self.manga.fetch()
manga_title = self.manga.titles[self.language].primary or (
self.manga.titles.first().primary if self.manga.titles else self.manga.id
)
manga_title = re.sub("_{2,}", "_", invalid_folder_name_regex.sub("_", manga_title.strip()))
base = folder_format.format(
manga=manga_title, chapter_num=chapter_num, separator=separator, title=title
)
makedirs(base, exist_ok=True)
for original_file_name, (num, item) in zip(
self.data_saver_page_names if use_data_saver else self.page_names, enumerate(byte_list, start=1)
):
filename = (
file_format.format(num=num, num0=num - 1, name=original_file_name)
+ "."
+ original_file_name.rpartition(".")[-1]
)
full_path = join(base, filename)
if not (exists(full_path) and overwrite):
with open(full_path, "wb") as fp:
fp.write(item)
@staticmethod
def _get_number_from_chapter_string(chapter_str: str) -> Tuple[Optional[float], Optional[str]]:
if not chapter_str:
return None, None
elif chapter_str.isdecimal():
return float(chapter_str), None
else:
# Unfortunately for us some people decided to enter in garbage data, which means that we cannot cleanly
# convert to a float. Attempt to try to get something vaguely resembling a number or return a null
# chapter number and set the title as the value for the chapter number.
match = re.search(r"[\d.]+", chapter_str)
return None if not match else float(match.group(0)), chapter_str
[docs] def parse(self, data: Dict[str, Any]):
super().parse(data)
if "data" in data and "attributes" in data["data"]:
attributes = data["data"]["attributes"]
copy_key_to_attribute(attributes, "volume", self)
copy_key_to_attribute(attributes, "title", self)
copy_key_to_attribute(attributes, "chapter", self, "number")
copy_key_to_attribute(attributes, "translatedLanguage", self, "language")
copy_key_to_attribute(attributes, "hash", self)
copy_key_to_attribute(attributes, "data", self, "page_names")
copy_key_to_attribute(attributes, "dataSaver", self, "data_saver_page_names")
self._process_times(attributes)
self._parse_relationships(data)
if hasattr(self, "_users"):
self.user = self._users[0]
del self._users
if hasattr(self, "mangas"):
# This is needed to move the list of Mangas created by the parse_relationships function into a
# singular manga, since there can never be >1 manga per chapter.
self.mangas: List[Manga]
self.manga = self.mangas[0]
del self.mangas
def _process_times(self, attributes: Dict[str, str]):
super()._process_times(attributes)
copy_key_to_attribute(
attributes,
"publishAt",
self,
"publish_time",
transformation=lambda attrib: datetime.fromisoformat(attrib) if attrib else attrib,
)
[docs] async def fetch(self):
"""Fetch data about the chapter. |permission| ``chapter.view``
:raises: :class:`.InvalidID` if a chapter with the ID does not exist.
"""
await self._fetch("chapter.view", "chapter")
[docs] async def load_groups(self):
"""Shortcut method that calls :meth:`.MangadexClient.batch_groups` with the groups that belong to the group.
Roughly equivalent to:
.. code-block:: python
await client.batch_groups(*user.groups)
"""
await self.client.batch_groups(*self.groups)
[docs] async def mark_read(self):
"""Mark the chapter as read. |auth|
.. versionadded:: 0.5
"""
self.client.raise_exception_if_not_authenticated("GET", routes["read"])
r = await self.client.request("POST", routes["read"].format(id=self.id))
self.read = True
r.close()
[docs] async def mark_unread(self):
"""Mark the chapter as unread. |auth|
.. versionadded:: 0.5
"""
self.client.raise_exception_if_not_authenticated("GET", routes["read"])
r = await self.client.request("DELETE", routes["read"].format(id=self.id))
self.read = False
r.close()
[docs] async def toggle_read(self):
"""Toggle a chapter between being read and unread. Requires authentication.
.. versionadded:: 0.5
.. note::
This requires the read status of the chapter to be known. See :meth:`.get_read_status` or
:meth:`.ChapterList.get_read`.
:raises: :class:`.Unauthorized` is authentication is missing.
"""
if self.read:
await self.mark_unread()
else:
await self.mark_read()
[docs] async def get_read(self):
"""Gets whether or not the chapter is read. The read status can then be viewed in :attr:`.read`.
.. versionadded:: 0.5
"""
r = await self.client.request("GET", routes["manga_read"].format(id=self.manga.id))
self.manga._check_404(r)
json = await r.json()
r.close()
self.read = self.id in json["data"]