Tika Title encoding

This commit is contained in:
Jürgen Mummert
2026-01-10 12:30:20 +01:00
parent 38372539c2
commit 40792870bd
+13 -3
View File
@@ -77,7 +77,7 @@ class MeilisearchFilesParseCommand extends Command
$normalized = $originalUrl; $normalized = $originalUrl;
// ------------------------------------------------- // -------------------------------------------------
// Normalize URL → files/… // Normalize URL
// ------------------------------------------------- // -------------------------------------------------
if (str_contains($normalized, '?')) { if (str_contains($normalized, '?')) {
$parts = parse_url($normalized); $parts = parse_url($normalized);
@@ -173,7 +173,7 @@ class MeilisearchFilesParseCommand extends Command
} }
// ------------------------------------------------- // -------------------------------------------------
// Tika METADATA (Titel) // Tika METADATA (Title)
// ------------------------------------------------- // -------------------------------------------------
$title = null; $title = null;
@@ -208,7 +208,17 @@ class MeilisearchFilesParseCommand extends Command
} }
} catch (\Throwable) { } catch (\Throwable) {
// Titel ist optional // Metadata optional
}
// -------------------------------------------------
// TITLE FALLBACK (REQUIRED)
// -------------------------------------------------
if (!$title) {
$title = pathinfo($normalized, PATHINFO_FILENAME);
$title = str_replace(['_', '-'], ' ', $title);
$title = preg_replace('/\s+/u', ' ', $title);
$title = trim($title);
} }
// ------------------------------------------------- // -------------------------------------------------