Tika Title encoding
This commit is contained in:
@@ -74,6 +74,7 @@ class MeilisearchFilesParseCommand extends Command
|
||||
foreach ($files as $file) {
|
||||
|
||||
$originalUrl = (string) $file['url'];
|
||||
$existingTitle = trim((string) ($file['title'] ?? ''));
|
||||
$normalized = $originalUrl;
|
||||
|
||||
// -------------------------------------------------
|
||||
@@ -173,10 +174,14 @@ class MeilisearchFilesParseCommand extends Command
|
||||
}
|
||||
|
||||
// -------------------------------------------------
|
||||
// Tika METADATA (Title)
|
||||
// TITLE: keep existing editor-defined title
|
||||
// -------------------------------------------------
|
||||
$title = null;
|
||||
$title = $existingTitle !== '' ? $existingTitle : null;
|
||||
|
||||
// -------------------------------------------------
|
||||
// Tika METADATA (Title) – only if no existing title
|
||||
// -------------------------------------------------
|
||||
if ($title === null) {
|
||||
try {
|
||||
$metaResponse = $client->request(
|
||||
'PUT',
|
||||
@@ -208,21 +213,19 @@ class MeilisearchFilesParseCommand extends Command
|
||||
} catch (\Throwable) {
|
||||
// Metadata optional
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------
|
||||
// TITLE → ASCII SAFE (DELIBERATE DATA LOSS)
|
||||
// TITLE → ASCII SAFE (only if newly generated)
|
||||
// -------------------------------------------------
|
||||
if ($title) {
|
||||
// UTF-8 → ASCII, Unbekanntes verwerfen
|
||||
if ($existingTitle === '' && $title) {
|
||||
$title = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $title);
|
||||
|
||||
// Normalisieren
|
||||
$title = preg_replace('/\s+/', ' ', $title);
|
||||
$title = trim($title);
|
||||
}
|
||||
|
||||
// -------------------------------------------------
|
||||
// FALLBACK: Dateiname
|
||||
// FALLBACK: Dateiname (only if still empty)
|
||||
// -------------------------------------------------
|
||||
if (!$title || strlen($title) < 5) {
|
||||
$title = pathinfo($normalized, PATHINFO_FILENAME);
|
||||
|
||||
Reference in New Issue
Block a user