ffx/bin/ffx/file_properties.py

import os, re, click, json

from .media_descriptor import MediaDescriptor
from .pattern_controller import PatternController

from .process import executeProcess

from ffx.model.pattern import Pattern
from ffx.ffx_controller import FfxController
from ffx.show_descriptor import ShowDescriptor


class FileProperties():

    FILE_EXTENSIONS = ['mkv', 'mp4', 'avi', 'flv', 'webm']

    SE_INDICATOR_PATTERN = '([sS][0-9]+[eE][0-9]+)'
    SEASON_EPISODE_INDICATOR_MATCH = '[sS]([0-9]+)[eE]([0-9]+)'
    EPISODE_INDICATOR_MATCH = '[eE]([0-9]+)'

    DEFAULT_INDEX_DIGITS = 3

    def __init__(self, context, sourcePath):

        self.context = context

        self.__logger = context['logger']

        # Separate basedir, basename and extension for current source file
        self.__sourcePath = sourcePath

        self.__sourceDirectory = os.path.dirname(self.__sourcePath)
        self.__sourceFilename = os.path.basename(self.__sourcePath)

        sourcePathTokens = self.__sourceFilename.split('.')

        if sourcePathTokens[-1] in FileProperties.FILE_EXTENSIONS:
            self.__sourceFileBasename = '.'.join(sourcePathTokens[:-1])
            self.__sourceFilenameExtension = sourcePathTokens[-1]
        else:
            self.__sourceFileBasename = self.__sourceFilename
            self.__sourceFilenameExtension = ''


        self.__pc = PatternController(context)

        # db pattern boruto_[sS]([0-9]+)[eE]([0-9]+).mkv

        # Checking if database contains matching pattern
        matchResult = self.__pc.matchFilename(self.__sourceFilename)

        self.__logger.debug(f"FileProperties.__init__(): Match result: {matchResult}")

        self.__pattern: Pattern = matchResult['pattern'] if matchResult else None

        if matchResult:
            databaseMatchedGroups = matchResult['match'].groups()
            self.__logger.debug(f"FileProperties.__init__(): Matched groups: {databaseMatchedGroups}")

            seIndicator = databaseMatchedGroups[0]

            se_match = re.search(FileProperties.SEASON_EPISODE_INDICATOR_MATCH, seIndicator)
            e_match = re.search(FileProperties.EPISODE_INDICATOR_MATCH, seIndicator)

        else:
            self.__logger.debug(f"FileProperties.__init__(): Checking file name for indicator {self.__sourceFilename}")

            se_match = re.search(FileProperties.SEASON_EPISODE_INDICATOR_MATCH, self.__sourceFilename)
            e_match = re.search(FileProperties.EPISODE_INDICATOR_MATCH, self.__sourceFilename)

        if se_match is not None:
            self.__season = int(se_match.group(1))
            self.__episode = int(se_match.group(2))
        elif e_match is not None:
            self.__season = -1
            self.__episode = int(e_match.group(1))
        else:
            self.__season = -1
            self.__episode = -1


    def getFormatData(self):
        """
        "format": {
            "filename": "Downloads/nagatoro_s02/nagatoro_s01e02.mkv",
            "nb_streams": 18,
            "nb_programs": 0,
            "nb_stream_groups": 0,
            "format_name": "matroska,webm",
            "format_long_name": "Matroska / WebM",
            "start_time": "0.000000",
            "duration": "1420.063000",
            "size": "1489169824",
            "bit_rate": "8389316",
            "probe_score": 100,
            "tags": {
                "PUBLISHER": "Crunchyroll",
                "ENCODER": "Lavf58.29.100"
            }
        }
        """

        ffprobeOutput, ffprobeError, returnCode = executeProcess(["ffprobe",
                                                        "-hide_banner",
                                                        "-show_format",
                                                        "-of", "json",
                                                        self.__sourcePath])


        if 'Invalid data found when processing input' in ffprobeError:
            raise Exception(f"File {self.__sourcePath} does not contain valid stream data")


        if returnCode != 0:
            raise Exception(f"ffprobe returned with error {returnCode}")


        return json.loads(ffprobeOutput)['format']

    #[{'index': 0, 'codec_name': 'vp9', 'codec_long_name': 'Google VP9', 'profile': 'Profile 0', 'codec_type': 'video', 'codec_tag_string': '[0][0][0][0]', 'codec_tag': '0x0000', 'width': 1920, 'height': 1080, 'coded_width': 1920, 'coded_height': 1080, 'closed_captions': 0, 'film_grain': 0, 'has_b_frames': 0, 'sample_aspect_ratio': '1:1', 'display_aspect_ratio': '16:9', 'pix_fmt': 'yuv420p', 'level': -99, 'color_range': 'tv', 'chroma_location': 'left', 'field_order': 'progressive', 'refs': 1, 'r_frame_rate': '24000/1001', 'avg_frame_rate': '24000/1001', 'time_base': '1/1000', 'start_pts': 0, 'start_time': '0.000000', 'disposition': {'default': 1, 'dub': 0, 'original': 0, 'comment': 0, 'lyrics': 0, 'karaoke': 0, 'forced': 0, 'hearing_impaired': 0, 'visual_impaired': 0, 'clean_effects': 0, 'attached_pic': 0, 'timed_thumbnails': 0, 'non_diegetic': 0, 'captions': 0, 'descriptions': 0, 'metadata': 0, 'dependent': 0, 'still_image': 0}, 'tags': {'BPS': '7974017', 'NUMBER_OF_FRAMES': '34382', 'NUMBER_OF_BYTES': '1429358655', '_STATISTICS_WRITING_APP': "mkvmerge v63.0.0 ('Everything') 64-bit", '_STATISTICS_WRITING_DATE_UTC': '2023-10-07 13:59:46', '_STATISTICS_TAGS': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES', 'ENCODER': 'Lavc61.3.100 libvpx-vp9', 'DURATION': '00:23:54.016000000'}}]
    #[{'index': 1, 'codec_name': 'opus', 'codec_long_name': 'Opus (Opus Interactive Audio Codec)', 'codec_type': 'audio', 'codec_tag_string': '[0][0][0][0]', 'codec_tag': '0x0000', 'sample_fmt': 'fltp', 'sample_rate': '48000', 'channels': 2, 'channel_layout': 'stereo', 'bits_per_sample': 0, 'initial_padding': 312, 'r_frame_rate': '0/0', 'avg_frame_rate': '0/0', 'time_base': '1/1000', 'start_pts': -7, 'start_time': '-0.007000', 'extradata_size': 19, 'disposition': {'default': 1, 'dub': 0, 'original': 0, 'comment': 0, 'lyrics': 0, 'karaoke': 0, 'forced': 0, 'hearing_impaired': 0, 'visual_impaired': 0, 'clean_effects': 0, 'attached_pic': 0, 'timed_thumbnails': 0, 'non_diegetic': 0, 'captions': 0, 'descriptions': 0, 'metadata': 0, 'dependent': 0, 'still_image': 0}, 'tags': {'language': 'jpn', 'title': 'Japanisch', 'BPS': '128000', 'NUMBER_OF_FRAMES': '61763', 'NUMBER_OF_BYTES': '22946145', '_STATISTICS_WRITING_APP': "mkvmerge v63.0.0 ('Everything') 64-bit", '_STATISTICS_WRITING_DATE_UTC': '2023-10-07 13:59:46', '_STATISTICS_TAGS': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES', 'ENCODER': 'Lavc61.3.100 libopus', 'DURATION': '00:23:54.141000000'}}]

    #[{'index': 2, 'codec_name': 'webvtt', 'codec_long_name': 'WebVTT subtitle', 'codec_type': 'subtitle', 'codec_tag_string': '[0][0][0][0]', 'codec_tag': '0x0000', 'r_frame_rate': '0/0', 'avg_frame_rate': '0/0', 'time_base': '1/1000', 'start_pts': -7, 'start_time': '-0.007000', 'duration_ts': 1434141, 'duration': '1434.141000', 'disposition': {'default': 1, 'dub': 0, 'original': 0, 'comment': 0, 'lyrics': 0, 'karaoke': 0, 'forced': 0, 'hearing_impaired': 0, 'visual_impaired': 0, 'clean_effects': 0, 'attached_pic': 0, 'timed_thumbnails': 0, 'non_diegetic': 0, 'captions': 0, 'descriptions': 0, 'metadata': 0, 'dependent': 0, 'still_image': 0}, 'tags': {'language': 'ger', 'title': 'Deutsch [Full]', 'BPS': '118', 'NUMBER_OF_FRAMES': '300', 'NUMBER_OF_BYTES': '21128', '_STATISTICS_WRITING_APP': "mkvmerge v63.0.0 ('Everything') 64-bit", '_STATISTICS_WRITING_DATE_UTC': '2023-10-07 13:59:46', '_STATISTICS_TAGS': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES', 'ENCODER': 'Lavc61.3.100 webvtt', 'DURATION': '00:23:54.010000000'}}, {'index': 3, 'codec_name': 'webvtt', 'codec_long_name': 'WebVTT subtitle', 'codec_type': 'subtitle', 'codec_tag_string': '[0][0][0][0]', 'codec_tag': '0x0000', 'r_frame_rate': '0/0', 'avg_frame_rate': '0/0', 'time_base': '1/1000', 'start_pts': -7, 'start_time': '-0.007000', 'duration_ts': 1434141, 'duration': '1434.141000', 'disposition': {'default': 0, 'dub': 0, 'original': 0, 'comment': 0, 'lyrics': 0, 'karaoke': 0, 'forced': 0, 'hearing_impaired': 0, 'visual_impaired': 0, 'clean_effects': 0, 'attached_pic': 0, 'timed_thumbnails': 0, 'non_diegetic': 0, 'captions': 0, 'descriptions': 0, 'metadata': 0, 'dependent': 0, 'still_image': 0}, 'tags': {'language': 'eng', 'title': 'Englisch [Full]', 'BPS': '101', 'NUMBER_OF_FRAMES': '276', 'NUMBER_OF_BYTES': '16980', '_STATISTICS_WRITING_APP': "mkvmerge v63.0.0 ('Everything') 64-bit", '_STATISTICS_WRITING_DATE_UTC': '2023-10-07 13:59:46', '_STATISTICS_TAGS': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES', 'ENCODER': 'Lavc61.3.100 webvtt', 'DURATION': '00:23:53.230000000'}}]


    def getStreamData(self):
        """Returns ffprobe stream data as array with elements according to the following example
        {
            "index": 4,
            "codec_name": "hdmv_pgs_subtitle",
            "codec_long_name": "HDMV Presentation Graphic Stream subtitles",
            "codec_type": "subtitle",
            "codec_tag_string": "[0][0][0][0]",
            "codec_tag": "0x0000",
            "r_frame_rate": "0/0",
            "avg_frame_rate": "0/0",
            "time_base": "1/1000",
            "start_pts": 0,
            "start_time": "0.000000",
            "duration_ts": 1421035,
            "duration": "1421.035000",
            "disposition": {
                "default": 1,
                "dub": 0,
                "original": 0,
                "comment": 0,
                "lyrics": 0,
                "karaoke": 0,
                "forced": 0,
                "hearing_impaired": 0,
                "visual_impaired": 0,
                "clean_effects": 0,
                "attached_pic": 0,
                "timed_thumbnails": 0,
                "non_diegetic": 0,
                "captions": 0,
                "descriptions": 0,
                "metadata": 0,
                "dependent": 0,
                "still_image": 0
            },
            "tags": {
                "language": "ger",
                "title": "German Full"
            }
        }
        """


        ffprobeOutput, ffprobeError, returnCode = executeProcess(["ffprobe",
                                                        "-hide_banner",
                                                        "-show_streams",
                                                        "-of", "json",
                                                        self.__sourcePath])

        if 'Invalid data found when processing input' in ffprobeError:
            raise Exception(f"File {self.__sourcePath} does not contain valid stream data")


        if returnCode != 0:
            raise Exception(f"ffprobe returned with error {returnCode}")


        return json.loads(ffprobeOutput)['streams']


    def getMediaDescriptor(self):
        return MediaDescriptor.fromFfprobe(self.context, self.getFormatData(), self.getStreamData())


    def getShowId(self) -> int:
        """Result is -1 if the filename did not match anything in database"""
        return self.__pattern.getShowId() if self.__pattern is not None else -1

    def getPattern(self) -> Pattern:
        """Result is None if the filename did not match anything in database"""
        return self.__pattern


    def getSeason(self):
        return int(self.__season)

    def getEpisode(self):
        return int(self.__episode)


    def getFilename(self):
        return self.__sourceFilename

    def getFileBasename(self):
        return self.__sourceFileBasename


    def assembleTargetFileBasename(self,
                                label: str = "",
                                quality: int = -1,
                                fileIndex: int = -1,
                                indexDigits: int = DEFAULT_INDEX_DIGITS,
                                extraTokens: list = []):

        if 'show_descriptor' in self.context.keys():
            season_digits = self.context['show_descriptor'][ShowDescriptor.INDICATOR_SEASON_DIGITS_KEY]
            episode_digits = self.context['show_descriptor'][ShowDescriptor.INDICATOR_EPISODE_DIGITS_KEY]
        else:
            season_digits = ShowDescriptor.DEFAULT_INDICATOR_SEASON_DIGITS
            episode_digits = ShowDescriptor.DEFAULT_INDICATOR_EPISODE_DIGITS

        targetFilenameTokens = []

        # targetFilenameExtension = FfxController.DEFAULT_FILE_EXTENSION if extension is None else str(extension)

        self.__logger.debug(f"assembleTargetFileBasename(): label={label} is {'truthy' if label else 'falsy'}")

        if label:

            targetFilenameTokens = [label]

            if fileIndex > -1:
                targetFilenameTokens += [f"{fileIndex:0{indexDigits}d}"]
            elif self.__season > -1 and self.__episode > -1:
                targetFilenameTokens += [f"S{self.__season:0{season_digits}d}E{self.__episode:0{episode_digits}d}"]
            elif self.__episode > -1:
                targetFilenameTokens += [f"E{self.__episode:0{episode_digits}d}"]

        else:
            targetFilenameTokens = [self.__sourceFileBasename]


        if quality != -1:
            targetFilenameTokens += [f"q{quality}"]

        # In case source and target filenames are the same add an extension to distinct output from input
        #if not label and self.__sourceFilenameExtension == targetFilenameExtension:
        #    targetFilenameTokens += ['ffx']
        targetFilenameTokens += extraTokens

        targetFilename = '_'.join(targetFilenameTokens)

        self.__logger.debug(f"assembleTargetFileBasename(): Target filename: {targetFilename}")

        return targetFilename