Fix regex issues
This commit is contained in:
@@ -13,6 +13,7 @@
|
|||||||
- Shared CLI defaults for container/output tokens now live outside [`src/ffx/ffx_controller.py`](/home/osgw/.local/src/codex/ffx/src/ffx/ffx_controller.py), and a focused unit test locks in the lazy-import contract.
|
- Shared CLI defaults for container/output tokens now live outside [`src/ffx/ffx_controller.py`](/home/osgw/.local/src/codex/ffx/src/ffx/ffx_controller.py), and a focused unit test locks in the lazy-import contract.
|
||||||
- `FileProperties` now uses one cached `ffprobe -show_format -show_streams -of json` call per source file, and the combined payload was confirmed against the Dragonball asset to satisfy both previous probe call sites fully.
|
- `FileProperties` now uses one cached `ffprobe -show_format -show_streams -of json` call per source file, and the combined payload was confirmed against the Dragonball asset to satisfy both previous probe call sites fully.
|
||||||
- Database startup now bootstraps schema only when required tables are actually missing, while version enforcement still runs on ordinary DB-backed context creation.
|
- Database startup now bootstraps schema only when required tables are actually missing, while version enforcement still runs on ordinary DB-backed context creation.
|
||||||
|
- Helper filename and rich-text utilities now use compiled raw regexes plus translate-based filename filtering, with unit coverage for TMDB suffix rewriting and Rich color stripping.
|
||||||
- FFX logger setup now reuses named handlers, and fallback logger access no longer mutates handlers in ordinary constructors and helpers.
|
- FFX logger setup now reuses named handlers, and fallback logger access no longer mutates handlers in ordinary constructors and helpers.
|
||||||
- The process wrapper now uses `subprocess.run(...)` with centralized command formatting plus stable timeout and missing-command error mapping.
|
- The process wrapper now uses `subprocess.run(...)` with centralized command formatting plus stable timeout and missing-command error mapping.
|
||||||
- Active ORM controllers now use single-query accessors instead of paired `count()` plus `first()` lookups.
|
- Active ORM controllers now use single-query accessors instead of paired `count()` plus `first()` lookups.
|
||||||
@@ -95,15 +96,6 @@
|
|||||||
- Fewer surprises in production-like runs.
|
- Fewer surprises in production-like runs.
|
||||||
- Easier support for user-reported performance behavior.
|
- Easier support for user-reported performance behavior.
|
||||||
|
|
||||||
8. Regex and string utility cleanup
|
|
||||||
- [`src/ffx/helper.py`](/home/osgw/.local/src/codex/ffx/src/ffx/helper.py) still has repeated string-replacement churn in filename/TMDB normalization helpers, and regex handling in helpers is easy to regress quietly.
|
|
||||||
- Optimization:
|
|
||||||
- Keep regex literals raw and centralized where appropriate.
|
|
||||||
- Review filename and TMDB substitution helpers for repeated string churn.
|
|
||||||
- Expected value:
|
|
||||||
- Cleaner runtime output.
|
|
||||||
- Less warning noise during dry-run maintenance commands.
|
|
||||||
|
|
||||||
## Open
|
## Open
|
||||||
|
|
||||||
- Should optimization work focus first on operator-perceived latency, internal maintainability, or correctness-risk cleanup that also has performance upside?
|
- Should optimization work focus first on operator-perceived latency, internal maintainability, or correctness-risk cleanup that also has performance upside?
|
||||||
|
|||||||
@@ -16,7 +16,21 @@ DIFF_REMOVED_KEY = 'removed'
|
|||||||
DIFF_CHANGED_KEY = 'changed'
|
DIFF_CHANGED_KEY = 'changed'
|
||||||
DIFF_UNCHANGED_KEY = 'unchanged'
|
DIFF_UNCHANGED_KEY = 'unchanged'
|
||||||
|
|
||||||
RICH_COLOR_PATTERN = '\\[[a-z_]+\\](.+)\\[\\/[a-z_]+\\]'
|
FILENAME_FILTER_TRANSLATION = str.maketrans(
|
||||||
|
{
|
||||||
|
"/": "-",
|
||||||
|
":": ";",
|
||||||
|
"*": "",
|
||||||
|
"'": "",
|
||||||
|
"?": "#",
|
||||||
|
"♥": "",
|
||||||
|
"’": "",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
TMDB_FILLER_MARKERS = (" (*)", "(*)")
|
||||||
|
TMDB_EPISODE_RANGE_SUFFIX_REGEX = re.compile(r"\(([0-9]+)[-/]([0-9]+)\)$")
|
||||||
|
TMDB_EPISODE_PART_SUFFIX_REGEX = re.compile(r"\(([0-9]+)\)$")
|
||||||
|
RICH_COLOR_REGEX = re.compile(r"\[[a-z_]+\](.+)\[/[a-z_]+\]")
|
||||||
|
|
||||||
|
|
||||||
def dictDiff(a : dict, b : dict, ignoreKeys: list = [], removeKeys: list = []):
|
def dictDiff(a : dict, b : dict, ignoreKeys: list = [], removeKeys: list = []):
|
||||||
@@ -115,39 +129,35 @@ def filterFilename(fileName: str) -> str:
|
|||||||
"""This filter replaces charactes from TMDB responses with characters
|
"""This filter replaces charactes from TMDB responses with characters
|
||||||
less problemating when using in filenames or removes them"""
|
less problemating when using in filenames or removes them"""
|
||||||
|
|
||||||
fileName = str(fileName).replace('/', '-')
|
return str(fileName).translate(FILENAME_FILTER_TRANSLATION).strip()
|
||||||
fileName = str(fileName).replace(':', ';')
|
|
||||||
fileName = str(fileName).replace('*', '')
|
|
||||||
fileName = str(fileName).replace("'", '')
|
|
||||||
fileName = str(fileName).replace("?", '#')
|
|
||||||
fileName = str(fileName).replace('♥', '')
|
|
||||||
fileName = str(fileName).replace('’', '')
|
|
||||||
|
|
||||||
return fileName.strip()
|
|
||||||
|
|
||||||
def substituteTmdbFilename(fileName: str) -> str:
|
def substituteTmdbFilename(fileName: str) -> str:
|
||||||
"""If chaining this method with filterFilename use this one first as the latter will destroy some patterns"""
|
"""If chaining this method with filterFilename use this one first as the latter will destroy some patterns"""
|
||||||
|
|
||||||
# This indicates filler episodes in TMDB episode names
|
normalizedFileName = str(fileName)
|
||||||
fileName = str(fileName).replace(' (*)', '')
|
|
||||||
fileName = str(fileName).replace('(*)', '')
|
|
||||||
|
|
||||||
# This indicates the index of multi-episode files
|
for fillerMarker in TMDB_FILLER_MARKERS:
|
||||||
episodePartMatch = re.search("\\(([0-9]+)\\)$", fileName)
|
normalizedFileName = normalizedFileName.replace(fillerMarker, '')
|
||||||
|
|
||||||
|
episodeRangeMatch = TMDB_EPISODE_RANGE_SUFFIX_REGEX.search(normalizedFileName)
|
||||||
|
if episodeRangeMatch is not None:
|
||||||
|
partFirstIndex, partLastIndex = episodeRangeMatch.groups()
|
||||||
|
return TMDB_EPISODE_RANGE_SUFFIX_REGEX.sub(
|
||||||
|
f"Teil {partFirstIndex}-{partLastIndex}",
|
||||||
|
normalizedFileName,
|
||||||
|
count=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
episodePartMatch = TMDB_EPISODE_PART_SUFFIX_REGEX.search(normalizedFileName)
|
||||||
if episodePartMatch is not None:
|
if episodePartMatch is not None:
|
||||||
partSuffix = str(episodePartMatch.group(0))
|
partIndex = episodePartMatch.group(1)
|
||||||
partIndex = episodePartMatch.groups()[0]
|
return TMDB_EPISODE_PART_SUFFIX_REGEX.sub(
|
||||||
fileName = str(fileName).replace(partSuffix, f"Teil {partIndex}")
|
f"Teil {partIndex}",
|
||||||
|
normalizedFileName,
|
||||||
|
count=1,
|
||||||
|
)
|
||||||
|
|
||||||
# Also multi-episodes with first and last episode index
|
return normalizedFileName
|
||||||
episodePartMatch = re.search("\\(([0-9]+)[-\\/]([0-9]+)\\)$", fileName)
|
|
||||||
if episodePartMatch is not None:
|
|
||||||
partSuffix = str(episodePartMatch.group(0))
|
|
||||||
partFirstIndex = episodePartMatch.groups()[0]
|
|
||||||
partLastIndex = episodePartMatch.groups()[1]
|
|
||||||
fileName = str(fileName).replace(partSuffix, f"Teil {partFirstIndex}-{partLastIndex}")
|
|
||||||
|
|
||||||
return fileName
|
|
||||||
|
|
||||||
|
|
||||||
def getEpisodeFileBasename(showName,
|
def getEpisodeFileBasename(showName,
|
||||||
@@ -231,7 +241,7 @@ def formatRichColor(text: str, color: str = None):
|
|||||||
return f"[{color}]{text}[/{color}]"
|
return f"[{color}]{text}[/{color}]"
|
||||||
|
|
||||||
def removeRichColor(text: str):
|
def removeRichColor(text: str):
|
||||||
richColorMatch = re.search(RICH_COLOR_PATTERN, text)
|
richColorMatch = RICH_COLOR_REGEX.search(str(text))
|
||||||
if richColorMatch is None:
|
if richColorMatch is None:
|
||||||
return text
|
return text
|
||||||
else:
|
else:
|
||||||
|
|||||||
61
tests/unit/test_helper.py
Normal file
61
tests/unit/test_helper.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
|
||||||
|
SRC_ROOT = Path(__file__).resolve().parents[2] / "src"
|
||||||
|
|
||||||
|
if str(SRC_ROOT) not in sys.path:
|
||||||
|
sys.path.insert(0, str(SRC_ROOT))
|
||||||
|
|
||||||
|
|
||||||
|
from ffx.helper import ( # noqa: E402
|
||||||
|
filterFilename,
|
||||||
|
formatRichColor,
|
||||||
|
removeRichColor,
|
||||||
|
substituteTmdbFilename,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class HelperTests(unittest.TestCase):
|
||||||
|
def test_filter_filename_replaces_and_removes_problem_characters(self):
|
||||||
|
self.assertEqual(
|
||||||
|
"A-B;C#",
|
||||||
|
filterFilename(" A/B:C*'?♥’ "),
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_substitute_tmdb_filename_removes_filler_marker(self):
|
||||||
|
self.assertEqual(
|
||||||
|
"Episode Name",
|
||||||
|
substituteTmdbFilename("Episode Name (*)"),
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_substitute_tmdb_filename_rewrites_single_episode_suffix(self):
|
||||||
|
self.assertEqual(
|
||||||
|
"Episode Name Teil 2",
|
||||||
|
substituteTmdbFilename("Episode Name (2)"),
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_substitute_tmdb_filename_rewrites_episode_range_suffix(self):
|
||||||
|
self.assertEqual(
|
||||||
|
"Episode Name Teil 2-3",
|
||||||
|
substituteTmdbFilename("Episode Name (2/3)"),
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_remove_rich_color_returns_inner_text(self):
|
||||||
|
self.assertEqual(
|
||||||
|
"value",
|
||||||
|
removeRichColor(formatRichColor("value", "green")),
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_remove_rich_color_leaves_plain_text_unchanged(self):
|
||||||
|
self.assertEqual(
|
||||||
|
"plain text",
|
||||||
|
removeRichColor("plain text"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Reference in New Issue
Block a user