change IndexPage

This commit is contained in:
Jürgen Mummert
2026-01-09 12:07:09 +01:00
parent 8b22467799
commit 17ecdaec17
+17 -54
View File
@@ -3,15 +3,12 @@
namespace MummertMedia\ContaoMeilisearchBundle\EventListener; namespace MummertMedia\ContaoMeilisearchBundle\EventListener;
use Contao\Config; use Contao\Config;
use MummertMedia\ContaoMeilisearchBundle\Service\PdfIndexService;
use MummertMedia\ContaoMeilisearchBundle\Service\OfficeIndexService;
class IndexPageListener class IndexPageListener
{ {
public function __construct( public function __construct()
private readonly PdfIndexService $pdfIndexService, {
private readonly OfficeIndexService $officeIndexService, }
) {}
private function debug(string $message, array $context = []): void private function debug(string $message, array $context = []): void
{ {
@@ -103,7 +100,6 @@ class IndexPageListener
$this->debug('Meta: searchimage candidate', ['searchimage' => $searchImage]); $this->debug('Meta: searchimage candidate', ['searchimage' => $searchImage]);
if (!empty($searchImage)) { if (!empty($searchImage)) {
// >>> HINWEIS: falls dein tl_search-Feld "image" heißt, hier auf $set['image'] ändern!
$set['imagepath'] = trim((string) $searchImage); $set['imagepath'] = trim((string) $searchImage);
} }
@@ -152,7 +148,7 @@ class IndexPageListener
/* /*
* ===================== * =====================
* DATEI-INDEXIERUNG (PDF / OFFICE) * DATEI-ERKENNUNG (PDF / OFFICE via Tika)
* ===================== * =====================
*/ */
if ((int) ($data['protected'] ?? 0) !== 0) { if ((int) ($data['protected'] ?? 0) !== 0) {
@@ -160,15 +156,13 @@ class IndexPageListener
return; return;
} }
$indexPdfs = (bool) Config::get('meilisearch_index_pdfs'); $indexFiles = (bool) Config::get('meilisearch_index_files');
$indexOffice = (bool) Config::get('meilisearch_index_office');
$this->debug('File indexing settings', [ $this->debug('File indexing setting', [
'meilisearch_index_pdfs' => $indexPdfs, 'meilisearch_index_files' => $indexFiles,
'meilisearch_index_office' => $indexOffice,
]); ]);
if (!$indexPdfs && !$indexOffice) { if (!$indexFiles) {
$this->debug('Abort: file indexing disabled'); $this->debug('Abort: file indexing disabled');
return; return;
} }
@@ -176,45 +170,22 @@ class IndexPageListener
$links = $this->findAllLinks($content); $links = $this->findAllLinks($content);
$this->debug('Links found', ['count' => count($links)]); $this->debug('Links found', ['count' => count($links)]);
$pdfLinks = []; $fileLinks = [];
$officeLinks = [];
foreach ($links as $link) { foreach ($links as $link) {
$type = $this->detectIndexableFileType($link['url']); $type = $this->detectIndexableFileType($link['url']);
if ($type !== null) {
if ($type === 'pdf' && $indexPdfs) { $fileLinks[] = $link + ['type' => $type];
$pdfLinks[] = $link;
continue;
}
if (in_array($type, ['docx', 'xlsx', 'pptx'], true) && $indexOffice) {
$officeLinks[] = $link;
} }
} }
$this->debug('Indexable file links', [ $this->debug('Indexable file links found', [
'pdf' => count($pdfLinks), 'count' => count($fileLinks),
'office' => count($officeLinks), 'types' => array_count_values(array_column($fileLinks, 'type')),
]); ]);
try { // ❗ ABSICHTLICH: hier passiert NOCH NICHTS
if ($pdfLinks !== []) { // Verarbeitung der Dateien folgt im nächsten Schritt
$this->debug('PDF handlePdfLinks(): call', ['count' => count($pdfLinks)]);
$this->pdfIndexService->handlePdfLinks($pdfLinks);
$this->debug('PDF handlePdfLinks(): ok');
}
if ($officeLinks !== []) {
$this->debug('Office handleOfficeLinks(): call', ['count' => count($officeLinks)]);
$this->officeIndexService->handleOfficeLinks($officeLinks);
$this->debug('Office handleOfficeLinks(): ok');
}
} catch (\Throwable $e) {
$this->debug('File indexing failed', [
'error' => $e->getMessage(),
'class' => $e::class,
]);
}
$this->debug('Hook end', [ $this->debug('Hook end', [
'final_set_keys' => array_keys($set), 'final_set_keys' => array_keys($set),
@@ -275,7 +246,6 @@ class IndexPageListener
*/ */
private function detectIndexableFileType(string $url): ?string private function detectIndexableFileType(string $url): ?string
{ {
// Hash entfernen
$url = strtok($url, '#'); $url = strtok($url, '#');
$parts = parse_url($url); $parts = parse_url($url);
@@ -283,7 +253,6 @@ class IndexPageListener
return null; return null;
} }
// direkter Pfad (/files/…)
if (!empty($parts['path'])) { if (!empty($parts['path'])) {
$ext = strtolower(pathinfo($parts['path'], PATHINFO_EXTENSION)); $ext = strtolower(pathinfo($parts['path'], PATHINFO_EXTENSION));
if (in_array($ext, ['pdf', 'docx', 'xlsx', 'pptx'], true)) { if (in_array($ext, ['pdf', 'docx', 'xlsx', 'pptx'], true)) {
@@ -291,18 +260,12 @@ class IndexPageListener
} }
} }
// Query-Parameter (Contao 4 + 5)
if (!empty($parts['query'])) { if (!empty($parts['query'])) {
parse_str($parts['query'], $query); parse_str($parts['query'], $query);
foreach (['file', 'p', 'f'] as $param) { foreach (['file', 'p', 'f'] as $param) {
if (!empty($query[$param])) { if (!empty($query[$param])) {
$candidate = (string) $query[$param]; $candidate = rawurldecode(html_entity_decode((string) $query[$param], ENT_QUOTES));
// sicher decodieren (Contao 4 + 5)
$candidate = html_entity_decode($candidate, ENT_QUOTES);
$candidate = rawurldecode($candidate);
$ext = strtolower(pathinfo($candidate, PATHINFO_EXTENSION)); $ext = strtolower(pathinfo($candidate, PATHINFO_EXTENSION));
if (in_array($ext, ['pdf', 'docx', 'xlsx', 'pptx'], true)) { if (in_array($ext, ['pdf', 'docx', 'xlsx', 'pptx'], true)) {