Tika Title encoding

This commit is contained in:
Jürgen Mummert
2026-01-10 12:30:20 +01:00
parent 38372539c2
commit 40792870bd
+13 -3
View File
@@ -77,7 +77,7 @@ class MeilisearchFilesParseCommand extends Command
$normalized = $originalUrl;
// -------------------------------------------------
// Normalize URL → files/…
// Normalize URL
// -------------------------------------------------
if (str_contains($normalized, '?')) {
$parts = parse_url($normalized);
@@ -173,7 +173,7 @@ class MeilisearchFilesParseCommand extends Command
}
// -------------------------------------------------
// Tika METADATA (Titel)
// Tika METADATA (Title)
// -------------------------------------------------
$title = null;
@@ -208,7 +208,17 @@ class MeilisearchFilesParseCommand extends Command
}
} catch (\Throwable) {
// Titel ist optional
// Metadata optional
}
// -------------------------------------------------
// TITLE FALLBACK (REQUIRED)
// -------------------------------------------------
if (!$title) {
$title = pathinfo($normalized, PATHINFO_FILENAME);
$title = str_replace(['_', '-'], ' ', $title);
$title = preg_replace('/\s+/u', ' ', $title);
$title = trim($title);
}
// -------------------------------------------------