Tika Title encoding
This commit is contained in:
@@ -145,7 +145,7 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// Tika BODY
|
// Tika BODY (roher Plaintext)
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
try {
|
try {
|
||||||
$this->log('Parsing file', ['url' => $normalized]);
|
$this->log('Parsing file', ['url' => $normalized]);
|
||||||
@@ -210,19 +210,26 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// TITLE FALLBACK
|
// TITLE → ASCII SAFE (DELIBERATE DATA LOSS)
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
if (!$title) {
|
if ($title) {
|
||||||
$title = pathinfo($normalized, PATHINFO_FILENAME);
|
// UTF-8 → ASCII, Unbekanntes verwerfen
|
||||||
$title = str_replace(['_', '-'], ' ', $title);
|
$title = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $title);
|
||||||
|
|
||||||
|
// Normalisieren
|
||||||
|
$title = preg_replace('/\s+/', ' ', $title);
|
||||||
|
$title = trim($title);
|
||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// 🔑 CRITICAL FIX: remove invalid UTF-8 bytes
|
// FALLBACK: Dateiname
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
$title = iconv('UTF-8', 'UTF-8//IGNORE', $title);
|
if (!$title || strlen($title) < 5) {
|
||||||
$title = preg_replace('/\s+/u', ' ', $title);
|
$title = pathinfo($normalized, PATHINFO_FILENAME);
|
||||||
$title = trim($title);
|
$title = str_replace(['_', '-'], ' ', $title);
|
||||||
|
$title = preg_replace('/\s+/', ' ', $title);
|
||||||
|
$title = trim($title);
|
||||||
|
}
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// Store result
|
// Store result
|
||||||
|
|||||||
Reference in New Issue
Block a user