Tika Title encoding
This commit is contained in:
@@ -55,8 +55,6 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
|
|
||||||
$db = Database::getInstance();
|
$db = Database::getInstance();
|
||||||
|
|
||||||
$db->query("SET NAMES utf8mb4 COLLATE utf8mb4_unicode_ci");
|
|
||||||
|
|
||||||
$sql = "SELECT * FROM tl_search_files ORDER BY tstamp ASC";
|
$sql = "SELECT * FROM tl_search_files ORDER BY tstamp ASC";
|
||||||
if ($limit !== null) {
|
if ($limit !== null) {
|
||||||
$sql .= " LIMIT " . (int) $limit;
|
$sql .= " LIMIT " . (int) $limit;
|
||||||
@@ -200,12 +198,10 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
?? null;
|
?? null;
|
||||||
|
|
||||||
if ($rawTitle) {
|
if ($rawTitle) {
|
||||||
$title = trim(
|
$title = html_entity_decode(
|
||||||
html_entity_decode(
|
$rawTitle,
|
||||||
$rawTitle,
|
ENT_QUOTES | ENT_HTML5,
|
||||||
ENT_QUOTES | ENT_HTML5,
|
'UTF-8'
|
||||||
'UTF-8'
|
|
||||||
)
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -214,15 +210,20 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// TITLE FALLBACK (REQUIRED)
|
// TITLE FALLBACK
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
if (!$title) {
|
if (!$title) {
|
||||||
$title = pathinfo($normalized, PATHINFO_FILENAME);
|
$title = pathinfo($normalized, PATHINFO_FILENAME);
|
||||||
$title = str_replace(['_', '-'], ' ', $title);
|
$title = str_replace(['_', '-'], ' ', $title);
|
||||||
$title = preg_replace('/\s+/u', ' ', $title);
|
|
||||||
$title = trim($title);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------
|
||||||
|
// 🔑 CRITICAL FIX: remove invalid UTF-8 bytes
|
||||||
|
// -------------------------------------------------
|
||||||
|
$title = iconv('UTF-8', 'UTF-8//IGNORE', $title);
|
||||||
|
$title = preg_replace('/\s+/u', ' ', $title);
|
||||||
|
$title = trim($title);
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// Store result
|
// Store result
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
|
|||||||
Reference in New Issue
Block a user