Fix regex issues

This commit is contained in:
Javanaut
2026-04-11 16:10:41 +02:00
parent fc729a2414
commit 358ef18f77
3 changed files with 100 additions and 37 deletions

View File

@@ -13,6 +13,7 @@
- Shared CLI defaults for container/output tokens now live outside [`src/ffx/ffx_controller.py`](/home/osgw/.local/src/codex/ffx/src/ffx/ffx_controller.py), and a focused unit test locks in the lazy-import contract. - Shared CLI defaults for container/output tokens now live outside [`src/ffx/ffx_controller.py`](/home/osgw/.local/src/codex/ffx/src/ffx/ffx_controller.py), and a focused unit test locks in the lazy-import contract.
- `FileProperties` now uses one cached `ffprobe -show_format -show_streams -of json` call per source file, and the combined payload was confirmed against the Dragonball asset to satisfy both previous probe call sites fully. - `FileProperties` now uses one cached `ffprobe -show_format -show_streams -of json` call per source file, and the combined payload was confirmed against the Dragonball asset to satisfy both previous probe call sites fully.
- Database startup now bootstraps schema only when required tables are actually missing, while version enforcement still runs on ordinary DB-backed context creation. - Database startup now bootstraps schema only when required tables are actually missing, while version enforcement still runs on ordinary DB-backed context creation.
- Helper filename and rich-text utilities now use compiled raw regexes plus translate-based filename filtering, with unit coverage for TMDB suffix rewriting and Rich color stripping.
- FFX logger setup now reuses named handlers, and fallback logger access no longer mutates handlers in ordinary constructors and helpers. - FFX logger setup now reuses named handlers, and fallback logger access no longer mutates handlers in ordinary constructors and helpers.
- The process wrapper now uses `subprocess.run(...)` with centralized command formatting plus stable timeout and missing-command error mapping. - The process wrapper now uses `subprocess.run(...)` with centralized command formatting plus stable timeout and missing-command error mapping.
- Active ORM controllers now use single-query accessors instead of paired `count()` plus `first()` lookups. - Active ORM controllers now use single-query accessors instead of paired `count()` plus `first()` lookups.
@@ -95,15 +96,6 @@
- Fewer surprises in production-like runs. - Fewer surprises in production-like runs.
- Easier support for user-reported performance behavior. - Easier support for user-reported performance behavior.
8. Regex and string utility cleanup
- [`src/ffx/helper.py`](/home/osgw/.local/src/codex/ffx/src/ffx/helper.py) still has repeated string-replacement churn in filename/TMDB normalization helpers, and regex handling in helpers is easy to regress quietly.
- Optimization:
- Keep regex literals raw and centralized where appropriate.
- Review filename and TMDB substitution helpers for repeated string churn.
- Expected value:
- Cleaner runtime output.
- Less warning noise during dry-run maintenance commands.
## Open ## Open
- Should optimization work focus first on operator-perceived latency, internal maintainability, or correctness-risk cleanup that also has performance upside? - Should optimization work focus first on operator-perceived latency, internal maintainability, or correctness-risk cleanup that also has performance upside?

View File

@@ -16,7 +16,21 @@ DIFF_REMOVED_KEY = 'removed'
DIFF_CHANGED_KEY = 'changed' DIFF_CHANGED_KEY = 'changed'
DIFF_UNCHANGED_KEY = 'unchanged' DIFF_UNCHANGED_KEY = 'unchanged'
RICH_COLOR_PATTERN = '\\[[a-z_]+\\](.+)\\[\\/[a-z_]+\\]' FILENAME_FILTER_TRANSLATION = str.maketrans(
{
"/": "-",
":": ";",
"*": "",
"'": "",
"?": "#",
"": "",
"": "",
}
)
TMDB_FILLER_MARKERS = (" (*)", "(*)")
TMDB_EPISODE_RANGE_SUFFIX_REGEX = re.compile(r"\(([0-9]+)[-/]([0-9]+)\)$")
TMDB_EPISODE_PART_SUFFIX_REGEX = re.compile(r"\(([0-9]+)\)$")
RICH_COLOR_REGEX = re.compile(r"\[[a-z_]+\](.+)\[/[a-z_]+\]")
def dictDiff(a : dict, b : dict, ignoreKeys: list = [], removeKeys: list = []): def dictDiff(a : dict, b : dict, ignoreKeys: list = [], removeKeys: list = []):
@@ -115,39 +129,35 @@ def filterFilename(fileName: str) -> str:
"""This filter replaces charactes from TMDB responses with characters """This filter replaces charactes from TMDB responses with characters
less problemating when using in filenames or removes them""" less problemating when using in filenames or removes them"""
fileName = str(fileName).replace('/', '-') return str(fileName).translate(FILENAME_FILTER_TRANSLATION).strip()
fileName = str(fileName).replace(':', ';')
fileName = str(fileName).replace('*', '')
fileName = str(fileName).replace("'", '')
fileName = str(fileName).replace("?", '#')
fileName = str(fileName).replace('', '')
fileName = str(fileName).replace('', '')
return fileName.strip()
def substituteTmdbFilename(fileName: str) -> str: def substituteTmdbFilename(fileName: str) -> str:
"""If chaining this method with filterFilename use this one first as the latter will destroy some patterns""" """If chaining this method with filterFilename use this one first as the latter will destroy some patterns"""
# This indicates filler episodes in TMDB episode names normalizedFileName = str(fileName)
fileName = str(fileName).replace(' (*)', '')
fileName = str(fileName).replace('(*)', '')
# This indicates the index of multi-episode files for fillerMarker in TMDB_FILLER_MARKERS:
episodePartMatch = re.search("\\(([0-9]+)\\)$", fileName) normalizedFileName = normalizedFileName.replace(fillerMarker, '')
episodeRangeMatch = TMDB_EPISODE_RANGE_SUFFIX_REGEX.search(normalizedFileName)
if episodeRangeMatch is not None:
partFirstIndex, partLastIndex = episodeRangeMatch.groups()
return TMDB_EPISODE_RANGE_SUFFIX_REGEX.sub(
f"Teil {partFirstIndex}-{partLastIndex}",
normalizedFileName,
count=1,
)
episodePartMatch = TMDB_EPISODE_PART_SUFFIX_REGEX.search(normalizedFileName)
if episodePartMatch is not None: if episodePartMatch is not None:
partSuffix = str(episodePartMatch.group(0)) partIndex = episodePartMatch.group(1)
partIndex = episodePartMatch.groups()[0] return TMDB_EPISODE_PART_SUFFIX_REGEX.sub(
fileName = str(fileName).replace(partSuffix, f"Teil {partIndex}") f"Teil {partIndex}",
normalizedFileName,
count=1,
)
# Also multi-episodes with first and last episode index return normalizedFileName
episodePartMatch = re.search("\\(([0-9]+)[-\\/]([0-9]+)\\)$", fileName)
if episodePartMatch is not None:
partSuffix = str(episodePartMatch.group(0))
partFirstIndex = episodePartMatch.groups()[0]
partLastIndex = episodePartMatch.groups()[1]
fileName = str(fileName).replace(partSuffix, f"Teil {partFirstIndex}-{partLastIndex}")
return fileName
def getEpisodeFileBasename(showName, def getEpisodeFileBasename(showName,
@@ -231,7 +241,7 @@ def formatRichColor(text: str, color: str = None):
return f"[{color}]{text}[/{color}]" return f"[{color}]{text}[/{color}]"
def removeRichColor(text: str): def removeRichColor(text: str):
richColorMatch = re.search(RICH_COLOR_PATTERN, text) richColorMatch = RICH_COLOR_REGEX.search(str(text))
if richColorMatch is None: if richColorMatch is None:
return text return text
else: else:

61
tests/unit/test_helper.py Normal file
View File

@@ -0,0 +1,61 @@
from __future__ import annotations
from pathlib import Path
import sys
import unittest
SRC_ROOT = Path(__file__).resolve().parents[2] / "src"
if str(SRC_ROOT) not in sys.path:
sys.path.insert(0, str(SRC_ROOT))
from ffx.helper import ( # noqa: E402
filterFilename,
formatRichColor,
removeRichColor,
substituteTmdbFilename,
)
class HelperTests(unittest.TestCase):
def test_filter_filename_replaces_and_removes_problem_characters(self):
self.assertEqual(
"A-B;C#",
filterFilename(" A/B:C*'?♥’ "),
)
def test_substitute_tmdb_filename_removes_filler_marker(self):
self.assertEqual(
"Episode Name",
substituteTmdbFilename("Episode Name (*)"),
)
def test_substitute_tmdb_filename_rewrites_single_episode_suffix(self):
self.assertEqual(
"Episode Name Teil 2",
substituteTmdbFilename("Episode Name (2)"),
)
def test_substitute_tmdb_filename_rewrites_episode_range_suffix(self):
self.assertEqual(
"Episode Name Teil 2-3",
substituteTmdbFilename("Episode Name (2/3)"),
)
def test_remove_rich_color_returns_inner_text(self):
self.assertEqual(
"value",
removeRichColor(formatRichColor("value", "green")),
)
def test_remove_rich_color_leaves_plain_text_unchanged(self):
self.assertEqual(
"plain text",
removeRichColor("plain text"),
)
if __name__ == "__main__":
unittest.main()