Tika Title encoding
This commit is contained in:
@@ -74,6 +74,7 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
foreach ($files as $file) {
|
foreach ($files as $file) {
|
||||||
|
|
||||||
$originalUrl = (string) $file['url'];
|
$originalUrl = (string) $file['url'];
|
||||||
|
$existingTitle = trim((string) ($file['title'] ?? ''));
|
||||||
$normalized = $originalUrl;
|
$normalized = $originalUrl;
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
@@ -173,10 +174,14 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// Tika METADATA (Title)
|
// TITLE: keep existing editor-defined title
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
$title = null;
|
$title = $existingTitle !== '' ? $existingTitle : null;
|
||||||
|
|
||||||
|
// -------------------------------------------------
|
||||||
|
// Tika METADATA (Title) – only if no existing title
|
||||||
|
// -------------------------------------------------
|
||||||
|
if ($title === null) {
|
||||||
try {
|
try {
|
||||||
$metaResponse = $client->request(
|
$metaResponse = $client->request(
|
||||||
'PUT',
|
'PUT',
|
||||||
@@ -208,21 +213,19 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
} catch (\Throwable) {
|
} catch (\Throwable) {
|
||||||
// Metadata optional
|
// Metadata optional
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// TITLE → ASCII SAFE (DELIBERATE DATA LOSS)
|
// TITLE → ASCII SAFE (only if newly generated)
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
if ($title) {
|
if ($existingTitle === '' && $title) {
|
||||||
// UTF-8 → ASCII, Unbekanntes verwerfen
|
|
||||||
$title = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $title);
|
$title = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $title);
|
||||||
|
|
||||||
// Normalisieren
|
|
||||||
$title = preg_replace('/\s+/', ' ', $title);
|
$title = preg_replace('/\s+/', ' ', $title);
|
||||||
$title = trim($title);
|
$title = trim($title);
|
||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// FALLBACK: Dateiname
|
// FALLBACK: Dateiname (only if still empty)
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
if (!$title || strlen($title) < 5) {
|
if (!$title || strlen($title) < 5) {
|
||||||
$title = pathinfo($normalized, PATHINFO_FILENAME);
|
$title = pathinfo($normalized, PATHINFO_FILENAME);
|
||||||
|
|||||||
Reference in New Issue
Block a user