From e5cce34619f24ed4188e72385447b5bb1cd0f26d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Mummert?= Date: Sat, 27 Dec 2025 22:41:36 +0100 Subject: [PATCH] Bugfix --- src/Service/MeilisearchImageHelper.php | 68 +++++----- src/Service/MeilisearchIndexService.php | 167 ++++++++++++++++-------- src/Service/OfficeIndexService.php | 88 +++++++------ src/Service/PdfIndexService.php | 101 +++++++------- 4 files changed, 239 insertions(+), 185 deletions(-) diff --git a/src/Service/MeilisearchImageHelper.php b/src/Service/MeilisearchImageHelper.php index bfaf05a..a33a546 100644 --- a/src/Service/MeilisearchImageHelper.php +++ b/src/Service/MeilisearchImageHelper.php @@ -20,72 +20,64 @@ class MeilisearchImageHelper */ public function resolveImagePath(?string $uuid): ?string { - error_log('--- MeiliImg START ---'); - if (!$uuid) { - error_log('[MeiliImg] UUID leer → return null'); return null; } - error_log('[MeiliImg] UUID = ' . $uuid); - // Contao-Framework initialisieren (CLI & Frontend) - $this->framework->initialize(); - error_log('[MeiliImg] Framework initialized'); - - /** @var FilesModel|null $file */ - $file = FilesModel::findByUuid($uuid); - - if (!$file) { - error_log('[MeiliImg] FilesModel::findByUuid() = NULL'); + try { + $this->framework->initialize(); + } catch (\Throwable $e) { + error_log('[ContaoMeilisearch] ImageHelper: Framework init failed: ' . $e->getMessage()); return null; } - error_log('[MeiliImg] FilesModel gefunden'); - error_log('[MeiliImg] file->path = ' . $file->path); - error_log('[MeiliImg] file->uuid = ' . ($file->uuid ?? '(n/a)')); + /** @var FilesModel|null $file */ + try { + $file = FilesModel::findByUuid($uuid); + } catch (\Throwable $e) { + error_log( + '[ContaoMeilisearch] ImageHelper: FilesModel lookup failed (' . $uuid . '): ' . $e->getMessage() + ); + return null; + } + + if (!$file) { + error_log('[ContaoMeilisearch] ImageHelper: File not found for UUID ' . $uuid); + return null; + } // ImageSize aus tl_settings - $rawSize = Config::get('meilisearch_imagesize'); - $imageSizeId = (int) $rawSize; - - error_log('[MeiliImg] meilisearch_imagesize raw = ' . var_export($rawSize, true)); - error_log('[MeiliImg] meilisearch_imagesize int = ' . $imageSizeId); + $imageSizeId = (int) Config::get('meilisearch_imagesize'); // Fallback: Originaldatei if ($imageSizeId <= 0) { - error_log('[MeiliImg] imageSizeId <= 0 → FALLBACK file->path = ' . $file->path); - error_log('--- MeiliImg END ---'); return $file->path; } try { - $builder = $this->studio + $figure = $this->studio ->createFigureBuilder() ->from($file->path) - ->setSize($imageSizeId); - - error_log('[MeiliImg] FigureBuilder erstellt (from=' . $file->path . ', size=' . $imageSizeId . ')'); - - $figure = $builder->build(); - error_log('[MeiliImg] Figure build() OK'); + ->setSize($imageSizeId) + ->build(); $image = $figure->getImage(); if ($image === null) { - error_log('[MeiliImg] figure->getImage() = NULL'); + error_log( + '[ContaoMeilisearch] ImageHelper: Image generation failed for ' . $file->path + ); return null; } - $src = $image->getImageSrc(); - - error_log('[MeiliImg] image->getImageSrc() = ' . $src); - - return $src ?: null; + return $image->getImageSrc() ?: null; } catch (\Throwable $e) { - error_log('[MeiliImg] EXCEPTION ' . get_class($e) . ': ' . $e->getMessage()); - error_log('--- MeiliImg END ---'); + error_log( + '[ContaoMeilisearch] ImageHelper: Image processing failed for ' + . $file->path . ': ' . $e->getMessage() + ); return null; } } diff --git a/src/Service/MeilisearchIndexService.php b/src/Service/MeilisearchIndexService.php index f41d381..7818d0e 100644 --- a/src/Service/MeilisearchIndexService.php +++ b/src/Service/MeilisearchIndexService.php @@ -34,26 +34,48 @@ class MeilisearchIndexService */ public function run(): void { - $this->framework->initialize(); + try { + $this->framework->initialize(); + } catch (\Throwable $e) { + error_log('[ContaoMeilisearch] Framework initialization failed: ' . $e->getMessage()); + return; + } $host = (string) Config::get('meilisearch_host'); $apiKey = (string) Config::get('meilisearch_api_write'); $this->indexName = (string) Config::get('meilisearch_index'); if ($host === '' || $this->indexName === '') { - throw new \RuntimeException('Meilisearch is not configured in tl_settings.'); + error_log('[ContaoMeilisearch] Meilisearch is not configured in tl_settings.'); + return; } - $this->client = new Client($host, $apiKey); - $index = $this->client->index($this->indexName); + try { + $this->client = new Client($host, $apiKey); + $index = $this->client->index($this->indexName); + } catch (\Throwable $e) { + error_log('[ContaoMeilisearch] Failed to connect to Meilisearch: ' . $e->getMessage()); + return; + } try { $index->updateSettings(['primaryKey' => 'id']); - } catch (\Throwable) {} + } catch (\Throwable $e) { + error_log('[ContaoMeilisearch] Failed to set primaryKey: ' . $e->getMessage()); + } - $this->ensureIndexSettings($index); + try { + $this->ensureIndexSettings($index); + } catch (\Throwable $e) { + error_log('[ContaoMeilisearch] Failed to update index settings: ' . $e->getMessage()); + } - $index->deleteAllDocuments(); + try { + $index->deleteAllDocuments(); + } catch (\Throwable $e) { + error_log('[ContaoMeilisearch] Failed to delete documents: ' . $e->getMessage()); + return; + } $this->indexTlSearch($index); $this->indexTlSearchPdf($index); @@ -78,7 +100,6 @@ class MeilisearchIndexService $text ); - // Text normalisieren $text = preg_replace('/\s{2,}/u', ' ', $text); $text = preg_replace('/\n{2,}/u', "\n", $text); @@ -121,7 +142,13 @@ class MeilisearchIndexService */ private function indexTlSearch(Indexes $index): void { - $rows = $this->connection->fetchAllAssociative('SELECT * FROM tl_search'); + try { + $rows = $this->connection->fetchAllAssociative('SELECT * FROM tl_search'); + } catch (\Throwable $e) { + error_log('[ContaoMeilisearch] Failed to read tl_search: ' . $e->getMessage()); + return; + } + if (!$rows) { return; } @@ -132,46 +159,58 @@ class MeilisearchIndexService $documents = []; foreach ($rows as $row) { - $type = $this->detectTypeFromMeta($row['meta'] ?? null); + try { + $type = $this->detectTypeFromMeta($row['meta'] ?? null); - $eventStart = null; - if ($type === 'event') { - $eventStart = $this->extractEventStartDate($row['meta'] ?? null); - if (!$indexPastEvents && $eventStart !== null && $eventStart < $today) { - continue; + $eventStart = null; + if ($type === 'event') { + $eventStart = $this->extractEventStartDate($row['meta'] ?? null); + if (!$indexPastEvents && $eventStart !== null && $eventStart < $today) { + continue; + } } - } - $cleanText = $this->stripMeilisearchMeta((string) $row['text']); + $cleanText = $this->stripMeilisearchMeta((string) $row['text']); - $doc = [ - 'id' => $type . '_' . $row['id'], - 'type' => $type, - 'title' => $row['title'], - 'text' => $cleanText, - 'url' => $row['url'], - 'protected' => (bool) $row['protected'], - 'checksum' => $row['checksum'], - 'keywords' => (string) ($row['keywords'] ?? ''), - 'priority' => (int) ($row['priority'] ?? 0), - ]; + $doc = [ + 'id' => $type . '_' . $row['id'], + 'type' => $type, + 'title' => $row['title'], + 'text' => $cleanText, + 'url' => $row['url'], + 'protected' => (bool) $row['protected'], + 'checksum' => $row['checksum'], + 'keywords' => (string) ($row['keywords'] ?? ''), + 'priority' => (int) ($row['priority'] ?? 0), + ]; - if ($eventStart !== null) { - $doc['startDate'] = $eventStart; - } - - if (!empty($row['imagepath'])) { - $imagePath = $this->imageHelper->resolveImagePath($row['imagepath']); - if ($imagePath !== null) { - $doc['poster'] = $imagePath; + if ($eventStart !== null) { + $doc['startDate'] = $eventStart; } - } - $documents[] = $doc; + if (!empty($row['imagepath'])) { + $imagePath = $this->imageHelper->resolveImagePath($row['imagepath']); + if ($imagePath !== null) { + $doc['poster'] = $imagePath; + } + } + + $documents[] = $doc; + + } catch (\Throwable $e) { + error_log( + '[ContaoMeilisearch] Failed to build document for tl_search ID ' + . ($row['id'] ?? '?') . ': ' . $e->getMessage() + ); + } } if ($documents !== []) { - $index->addDocuments($documents); + try { + $index->addDocuments($documents); + } catch (\Throwable $e) { + error_log('[ContaoMeilisearch] Failed to add tl_search documents: ' . $e->getMessage()); + } } } @@ -180,7 +219,13 @@ class MeilisearchIndexService */ private function indexTlSearchPdf(Indexes $index): void { - $rows = $this->connection->fetchAllAssociative('SELECT * FROM tl_search_pdf'); + try { + $rows = $this->connection->fetchAllAssociative('SELECT * FROM tl_search_pdf'); + } catch (\Throwable $e) { + error_log('[ContaoMeilisearch] Failed to read tl_search_pdf: ' . $e->getMessage()); + return; + } + if (!$rows) { return; } @@ -188,23 +233,37 @@ class MeilisearchIndexService $documents = []; foreach ($rows as $row) { - $fileType = in_array($row['type'], ['pdf', 'docx', 'xlsx', 'pptx'], true) - ? $row['type'] - : 'pdf'; + try { + $fileType = in_array($row['type'], ['pdf', 'docx', 'xlsx', 'pptx'], true) + ? $row['type'] + : 'pdf'; - $documents[] = [ - 'id' => $fileType . '_' . $row['id'], - 'type' => $fileType, - 'title' => $row['title'], - 'text' => $this->stripMeilisearchMeta((string) $row['text']), - 'url' => $row['url'], - 'checksum' => $row['checksum'], - 'poster' => self::FILETYPE_ICON_MAP[$fileType] - ?? self::FILETYPE_ICON_MAP['pdf'], - ]; + $documents[] = [ + 'id' => $fileType . '_' . $row['id'], + 'type' => $fileType, + 'title' => $row['title'], + 'text' => $this->stripMeilisearchMeta((string) $row['text']), + 'url' => $row['url'], + 'checksum' => $row['checksum'], + 'poster' => self::FILETYPE_ICON_MAP[$fileType] + ?? self::FILETYPE_ICON_MAP['pdf'], + ]; + + } catch (\Throwable $e) { + error_log( + '[ContaoMeilisearch] Failed to build PDF document for ID ' + . ($row['id'] ?? '?') . ': ' . $e->getMessage() + ); + } } - $index->addDocuments($documents); + if ($documents !== []) { + try { + $index->addDocuments($documents); + } catch (\Throwable $e) { + error_log('[ContaoMeilisearch] Failed to add tl_search_pdf documents: ' . $e->getMessage()); + } + } } private function detectTypeFromMeta(?string $meta): string diff --git a/src/Service/OfficeIndexService.php b/src/Service/OfficeIndexService.php index fc5dbb2..b8a802a 100644 --- a/src/Service/OfficeIndexService.php +++ b/src/Service/OfficeIndexService.php @@ -34,19 +34,15 @@ class OfficeIndexService } try { - error_log('bearbeite Office-Datei: ' . $url); - // innerhalb des Crawls gleiche URL nicht mehrfach parsen $seenKey = md5($url); if (isset($this->seenThisCrawl[$seenKey])) { - error_log('→ übersprungen: bereits im Crawl verarbeitet'); continue; } $this->seenThisCrawl[$seenKey] = true; $normalized = $this->normalizeOfficeUrl($url); if ($normalized === null) { - error_log('→ übersprungen: kein gültiger Office-Pfad'); continue; } @@ -54,7 +50,6 @@ class OfficeIndexService $absolutePath = $this->getAbsolutePath($relativePath); if (!is_file($absolutePath)) { - error_log('→ übersprungen: Datei existiert nicht: ' . $absolutePath); continue; } @@ -65,7 +60,6 @@ class OfficeIndexService $text = $this->parseOfficeFile($absolutePath, $type); if ($text === '') { - error_log('→ übersprungen: Office-Datei ohne Textinhalt'); continue; } @@ -78,10 +72,10 @@ class OfficeIndexService $type ); - error_log('geschrieben in tl_search_pdf'); - } catch (\Throwable $e) { - error_log('Office Service FEHLER: ' . $e->getMessage()); + error_log( + '[ContaoMeilisearch] Office indexing failed for "' . $url . '": ' . $e->getMessage() + ); } } } @@ -107,11 +101,7 @@ class OfficeIndexService parse_str($parts['query'], $query); if (!empty($query['p'])) { - $p = (string) $query['p']; - - // Query-Parameter korrekt dekodieren - $p = urldecode($p); - + $p = urldecode((string) $query['p']); $ext = strtolower(pathinfo($p, PATHINFO_EXTENSION)); if (in_array($ext, ['docx', 'xlsx', 'pptx'], true)) { @@ -136,29 +126,35 @@ class OfficeIndexService int $mtime, string $type ): void { - $db = Database::getInstance(); - - $db->prepare(' - INSERT INTO tl_search_pdf - (tstamp, type, url, title, text, checksum, file_mtime) - VALUES - (?, ?, ?, ?, ?, ?, ?) - ON DUPLICATE KEY UPDATE - tstamp=VALUES(tstamp), - type=VALUES(type), - url=VALUES(url), - title=VALUES(title), - text=VALUES(text), - file_mtime=VALUES(file_mtime) - ')->execute( - time(), - $type, - $url, - $title, - $text, - $checksum, - $mtime - ); + try { + Database::getInstance() + ->prepare(' + INSERT INTO tl_search_pdf + (tstamp, type, url, title, text, checksum, file_mtime) + VALUES + (?, ?, ?, ?, ?, ?, ?) + ON DUPLICATE KEY UPDATE + tstamp=VALUES(tstamp), + type=VALUES(type), + url=VALUES(url), + title=VALUES(title), + text=VALUES(text), + file_mtime=VALUES(file_mtime) + ') + ->execute( + time(), + $type, + $url, + $title, + $text, + $checksum, + $mtime + ); + } catch (\Throwable $e) { + error_log( + '[ContaoMeilisearch] Failed to write Office index entry (' . $url . '): ' . $e->getMessage() + ); + } } private function parseOfficeFile(string $absolutePath, string $type): string @@ -186,8 +182,10 @@ class OfficeIndexService } return $this->cleanText($text); - - } catch (\Throwable) { + } catch (\Throwable $e) { + error_log( + '[ContaoMeilisearch] Failed to parse DOCX "' . $absolutePath . '": ' . $e->getMessage() + ); return ''; } } @@ -205,8 +203,10 @@ class OfficeIndexService } return $this->cleanText($text); - - } catch (\Throwable) { + } catch (\Throwable $e) { + error_log( + '[ContaoMeilisearch] Failed to parse XLSX "' . $absolutePath . '": ' . $e->getMessage() + ); return ''; } } @@ -226,8 +226,10 @@ class OfficeIndexService } return $this->cleanText($text); - - } catch (\Throwable) { + } catch (\Throwable $e) { + error_log( + '[ContaoMeilisearch] Failed to parse PPTX "' . $absolutePath . '": ' . $e->getMessage() + ); return ''; } } diff --git a/src/Service/PdfIndexService.php b/src/Service/PdfIndexService.php index e29f467..f689c91 100644 --- a/src/Service/PdfIndexService.php +++ b/src/Service/PdfIndexService.php @@ -23,7 +23,6 @@ class PdfIndexService /** * Wird aus dem Listener beim ersten Hook-Call pro Crawl aufgerufen. - * MUSS IMMER laufen (auch wenn Checkbox später aus ist). */ public function resetTableOnce(): void { @@ -34,10 +33,11 @@ class PdfIndexService $this->didReset = true; $this->seenThisCrawl = []; - // bei <=100 PDFs: sauber & simpel - Database::getInstance()->execute('TRUNCATE tl_search_pdf'); - - error_log('PDF Reset: tl_search_pdf geleert (TRUNCATE)'); + try { + Database::getInstance()->execute('TRUNCATE tl_search_pdf'); + } catch (\Throwable $e) { + error_log('[ContaoMeilisearch] PDF reset failed: ' . $e->getMessage()); + } } /** @@ -54,25 +54,20 @@ class PdfIndexService } try { - error_log('bearbeite PDF: ' . $url); - - // innerhalb des Crawls gleiche URL nicht 20x parsen (News-Teaser etc.) + // innerhalb des Crawls gleiche URL nicht mehrfach parsen $seenKey = md5($url); if (isset($this->seenThisCrawl[$seenKey])) { - error_log('→ übersprungen: bereits im Crawl verarbeitet'); continue; } $this->seenThisCrawl[$seenKey] = true; $normalizedPath = $this->normalizePdfUrl($url); if ($normalizedPath === null) { - error_log('→ übersprungen: kein gültiger PDF-Pfad'); continue; } $absolutePath = $this->getAbsolutePath($normalizedPath); if (!is_file($absolutePath)) { - error_log('→ übersprungen: Datei existiert nicht: ' . $absolutePath); continue; } @@ -88,7 +83,6 @@ class PdfIndexService $text = $this->parsePdf($absolutePath); if ($text === '') { - error_log('→ übersprungen: PDF ohne Textinhalt'); continue; } @@ -100,10 +94,10 @@ class PdfIndexService $mtime ); - error_log('geschrieben in tl_search_pdf'); - } catch (\Throwable $e) { - error_log('PDF Service FEHLER: ' . $e->getMessage()); + error_log( + '[ContaoMeilisearch] PDF indexing failed for "' . $url . '": ' . $e->getMessage() + ); } } } @@ -118,8 +112,12 @@ class PdfIndexService $decoded = html_entity_decode($url); $parts = parse_url($decoded); - // Fall 2: absolute URL auf gleiche Site -> Pfad extrahieren - if (!empty($parts['path']) && str_starts_with($parts['path'], '/files/') && str_ends_with(strtolower($parts['path']), '.pdf')) { + // Fall 2: absolute URL auf gleiche Site + if ( + !empty($parts['path']) + && str_starts_with($parts['path'], '/files/') + && str_ends_with(strtolower($parts['path']), '.pdf') + ) { return $parts['path']; } @@ -131,13 +129,7 @@ class PdfIndexService parse_str($parts['query'], $query); if (!empty($query['p'])) { - $p = (string) $query['p']; - - // Query-Parameter korrekt dekodieren - $p = urldecode($p); - - // deine Links enthalten oft "pdf/DATEI.pdf" - // => wird zu "/files/pdf/DATEI.pdf" + $p = urldecode((string) $query['p']); return '/files/' . ltrim($p, '/'); } @@ -151,28 +143,33 @@ class PdfIndexService private function upsertPdf(string $url, string $title, string $text, string $checksum, int $mtime): void { - $db = Database::getInstance(); - - // wichtig: UNIQUE(checksum) -> entweder INSERT oder UPDATE - $db->prepare(' - INSERT INTO tl_search_pdf - (tstamp, url, title, text, checksum, file_mtime) - VALUES - (?, ?, ?, ?, ?, ?) - ON DUPLICATE KEY UPDATE - tstamp=VALUES(tstamp), - url=VALUES(url), - title=VALUES(title), - text=VALUES(text), - file_mtime=VALUES(file_mtime) - ')->execute( - time(), - $url, - $title, - $text, - $checksum, - $mtime - ); + try { + Database::getInstance() + ->prepare(' + INSERT INTO tl_search_pdf + (tstamp, url, title, text, checksum, file_mtime) + VALUES + (?, ?, ?, ?, ?, ?) + ON DUPLICATE KEY UPDATE + tstamp=VALUES(tstamp), + url=VALUES(url), + title=VALUES(title), + text=VALUES(text), + file_mtime=VALUES(file_mtime) + ') + ->execute( + time(), + $url, + $title, + $text, + $checksum, + $mtime + ); + } catch (\Throwable $e) { + error_log( + '[ContaoMeilisearch] Failed to write PDF index entry (' . $url . '): ' . $e->getMessage() + ); + } } private function parsePdf(string $absolutePath): string @@ -184,8 +181,10 @@ class PdfIndexService $text = $this->cleanPdfContent($pdf->getText()); return mb_substr($text, 0, 20000); - - } catch (\Throwable) { + } catch (\Throwable $e) { + error_log( + '[ContaoMeilisearch] Failed to parse PDF "' . $absolutePath . '": ' . $e->getMessage() + ); return ''; } } @@ -221,8 +220,10 @@ class PdfIndexService } } } - } catch (\Throwable) { - // ignore + } catch (\Throwable $e) { + error_log( + '[ContaoMeilisearch] Failed to read PDF metadata "' . $absolutePath . '": ' . $e->getMessage() + ); } return null;