Tika Title encoding

This commit is contained in:
Jürgen Mummert
2026-01-10 19:06:58 +01:00
parent 2d3ddac945
commit 86b81affdc
+11 -8
View File
@@ -74,6 +74,7 @@ class MeilisearchFilesParseCommand extends Command
foreach ($files as $file) { foreach ($files as $file) {
$originalUrl = (string) $file['url']; $originalUrl = (string) $file['url'];
$existingTitle = trim((string) ($file['title'] ?? ''));
$normalized = $originalUrl; $normalized = $originalUrl;
// ------------------------------------------------- // -------------------------------------------------
@@ -173,10 +174,14 @@ class MeilisearchFilesParseCommand extends Command
} }
// ------------------------------------------------- // -------------------------------------------------
// Tika METADATA (Title) // TITLE: keep existing editor-defined title
// ------------------------------------------------- // -------------------------------------------------
$title = null; $title = $existingTitle !== '' ? $existingTitle : null;
// -------------------------------------------------
// Tika METADATA (Title) only if no existing title
// -------------------------------------------------
if ($title === null) {
try { try {
$metaResponse = $client->request( $metaResponse = $client->request(
'PUT', 'PUT',
@@ -208,21 +213,19 @@ class MeilisearchFilesParseCommand extends Command
} catch (\Throwable) { } catch (\Throwable) {
// Metadata optional // Metadata optional
} }
}
// ------------------------------------------------- // -------------------------------------------------
// TITLE → ASCII SAFE (DELIBERATE DATA LOSS) // TITLE → ASCII SAFE (only if newly generated)
// ------------------------------------------------- // -------------------------------------------------
if ($title) { if ($existingTitle === '' && $title) {
// UTF-8 → ASCII, Unbekanntes verwerfen
$title = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $title); $title = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $title);
// Normalisieren
$title = preg_replace('/\s+/', ' ', $title); $title = preg_replace('/\s+/', ' ', $title);
$title = trim($title); $title = trim($title);
} }
// ------------------------------------------------- // -------------------------------------------------
// FALLBACK: Dateiname // FALLBACK: Dateiname (only if still empty)
// ------------------------------------------------- // -------------------------------------------------
if (!$title || strlen($title) < 5) { if (!$title || strlen($title) < 5) {
$title = pathinfo($normalized, PATHINFO_FILENAME); $title = pathinfo($normalized, PATHINFO_FILENAME);