From 99ef883da57972ef7d818027395cc092f0d1eee7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Mummert?= Date: Fri, 9 Jan 2026 22:01:16 +0100 Subject: [PATCH] new Twig --- .../MeilisearchFilesCleanupCommand.php | 34 ++-- src/EventListener/IndexPageListener.php | 149 +++++++++--------- src/Resources/config/services.yaml | 8 +- src/Resources/contao/dca/tl_search_files.php | 2 +- src/Resources/contao/dca/tl_settings.php | 77 ++++----- .../contao/languages/de/tl_settings.php | 10 +- .../mod_meilisearch_search.html.twig | 2 + 7 files changed, 138 insertions(+), 144 deletions(-) diff --git a/src/Command/MeilisearchFilesCleanupCommand.php b/src/Command/MeilisearchFilesCleanupCommand.php index d968c62..2b88cd0 100644 --- a/src/Command/MeilisearchFilesCleanupCommand.php +++ b/src/Command/MeilisearchFilesCleanupCommand.php @@ -3,7 +3,7 @@ namespace MummertMedia\ContaoMeilisearchBundle\Command; use Contao\CoreBundle\Framework\ContaoFramework; -use Doctrine\DBAL\Connection; +use Contao\Database; use Symfony\Component\Console\Command\Command; use Symfony\Component\Console\Input\InputInterface; use Symfony\Component\Console\Input\InputOption; @@ -13,7 +13,6 @@ class MeilisearchFilesCleanupCommand extends Command { public function __construct( private readonly ContaoFramework $framework, - private readonly Connection $connection, ) { parent::__construct(); } @@ -22,13 +21,13 @@ class MeilisearchFilesCleanupCommand extends Command { $this ->setName('meilisearch:files:cleanup') - ->setDescription('Remove stale indexed files from tl_search_files') + ->setDescription('Remove stale indexed files (PDF, DOCX, XLSX, PPTX) from tl_search_pdf') ->addOption( 'grace', null, InputOption::VALUE_OPTIONAL, 'Grace period in seconds (files newer than now-grace are kept)', - 86400 + 86400 // 24 Stunden ) ->addOption( 'dry-run', @@ -50,10 +49,10 @@ class MeilisearchFilesCleanupCommand extends Command $cutoff = time() - $grace; if ($dryRun) { - $count = $this->connection->fetchOne( - 'SELECT COUNT(*) FROM tl_search_files WHERE last_seen < ?', - [$cutoff] - ); + $count = Database::getInstance() + ->prepare('SELECT COUNT(*) AS cnt FROM tl_search_pdf WHERE last_seen < ?') + ->execute($cutoff) + ->cnt; $message = sprintf( '[DRY-RUN] %d stale file(s) would be removed (last_seen < %s)', @@ -64,14 +63,14 @@ class MeilisearchFilesCleanupCommand extends Command $output->writeln('' . $message . ''); $this->log($message); - $this->log('Cleaner stopped (dry-run)'); + $this->log('Cleaner successfully stopped'); return Command::SUCCESS; } - $affected = $this->connection->executeStatement( - 'DELETE FROM tl_search_files WHERE last_seen < ?', - [$cutoff] - ); + $affected = Database::getInstance() + ->prepare('DELETE FROM tl_search_pdf WHERE last_seen < ?') + ->execute($cutoff) + ->affectedRows; $message = sprintf( 'Removed %d stale file(s) (last_seen < %s)', @@ -93,8 +92,15 @@ class MeilisearchFilesCleanupCommand extends Command } } + /** + * Einheitliches Logging mit Zeitstempel + */ private function log(string $message): void { - error_log(sprintf('[%s] %s', date('Y-m-d H:i:s'), $message)); + error_log(sprintf( + '[%s] %s', + date('Y-m-d H:i:s'), + $message + )); } } \ No newline at end of file diff --git a/src/EventListener/IndexPageListener.php b/src/EventListener/IndexPageListener.php index 05954fe..a91e745 100644 --- a/src/EventListener/IndexPageListener.php +++ b/src/EventListener/IndexPageListener.php @@ -3,13 +3,15 @@ namespace MummertMedia\ContaoMeilisearchBundle\EventListener; use Contao\Config; -use Contao\System; +use MummertMedia\ContaoMeilisearchBundle\Service\PdfIndexService; +use MummertMedia\ContaoMeilisearchBundle\Service\OfficeIndexService; class IndexPageListener { - public function __construct() - { - } + public function __construct( + private readonly PdfIndexService $pdfIndexService, + private readonly OfficeIndexService $officeIndexService, + ) {} private function debug(string $message, array $context = []): void { @@ -101,6 +103,7 @@ class IndexPageListener $this->debug('Meta: searchimage candidate', ['searchimage' => $searchImage]); if (!empty($searchImage)) { + // >>> HINWEIS: falls dein tl_search-Feld "image" heißt, hier auf $set['image'] ändern! $set['imagepath'] = trim((string) $searchImage); } @@ -136,12 +139,20 @@ class IndexPageListener 'class' => $e::class, ]); } + + $this->debug('Meta: final set snapshot', [ + 'priority' => $set['priority'] ?? null, + 'keywords' => $set['keywords'] ?? null, + 'imagepath' => $set['imagepath'] ?? null, + 'startDate' => $set['startDate'] ?? null, + 'checksum' => $set['checksum'] ?? null, + ]); } } /* * ===================== - * DATEI-ERKENNUNG + UPSERT + * DATEI-INDEXIERUNG (PDF / OFFICE) * ===================== */ if ((int) ($data['protected'] ?? 0) !== 0) { @@ -149,13 +160,15 @@ class IndexPageListener return; } - $indexFiles = (bool) Config::get('meilisearch_index_files'); + $indexPdfs = (bool) Config::get('meilisearch_index_pdfs'); + $indexOffice = (bool) Config::get('meilisearch_index_office'); - $this->debug('File indexing setting', [ - 'meilisearch_index_files' => $indexFiles, + $this->debug('File indexing settings', [ + 'meilisearch_index_pdfs' => $indexPdfs, + 'meilisearch_index_office' => $indexOffice, ]); - if (!$indexFiles) { + if (!$indexPdfs && !$indexOffice) { $this->debug('Abort: file indexing disabled'); return; } @@ -163,85 +176,61 @@ class IndexPageListener $links = $this->findAllLinks($content); $this->debug('Links found', ['count' => count($links)]); - $fileLinks = []; + $pdfLinks = []; + $officeLinks = []; foreach ($links as $link) { $type = $this->detectIndexableFileType($link['url']); - if ($type !== null) { - $fileLinks[] = $link + ['type' => $type]; + + if ($type === 'pdf' && $indexPdfs) { + $pdfLinks[] = $link; + continue; + } + + if (in_array($type, ['docx', 'xlsx', 'pptx'], true) && $indexOffice) { + $officeLinks[] = $link; } } - $this->debug('Indexable file links found', [ - 'count' => count($fileLinks), - 'types' => array_count_values(array_column($fileLinks, 'type')), + $this->debug('Indexable file links', [ + 'pdf' => count($pdfLinks), + 'office' => count($officeLinks), ]); - if ($fileLinks) { - $db = System::getContainer()->get('database_connection'); - $time = time(); - - foreach ($fileLinks as $file) { - $url = strtok($file['url'], '#'); - - $path = parse_url($url, PHP_URL_PATH); - $abs = $path ? TL_ROOT . '/' . ltrim($path, '/') : null; - - $mtime = ($abs && is_file($abs)) ? filemtime($abs) : 0; - $checksum = md5($url . '|' . $mtime); - - $existing = $db->fetchAssociative( - 'SELECT id, checksum FROM tl_search_files WHERE url = ?', - [$url] - ); - - if ($existing) { - $db->update( - 'tl_search_files', - [ - 'tstamp' => $time, - 'last_seen' => $time, - 'page_id' => (int) ($data['pid'] ?? 0), - 'file_mtime' => $mtime, - 'checksum' => $checksum, - ], - ['id' => $existing['id']] - ); - - $this->debug('File updated', [ - 'url' => $url, - 'checksum' => $checksum, - ]); - } else { - $db->insert( - 'tl_search_files', - [ - 'tstamp' => $time, - 'last_seen' => $time, - 'type' => $file['type'], - 'url' => $url, - 'title' => $file['linkText'] ?? basename($url), - 'page_id' => (int) ($data['pid'] ?? 0), - 'file_mtime' => $mtime, - 'checksum' => $checksum, - ] - ); - - $this->debug('File inserted', [ - 'url' => $url, - 'checksum' => $checksum, - ]); - } + try { + if ($pdfLinks !== []) { + $this->debug('PDF handlePdfLinks(): call', ['count' => count($pdfLinks)]); + $this->pdfIndexService->handlePdfLinks($pdfLinks); + $this->debug('PDF handlePdfLinks(): ok'); } + + if ($officeLinks !== []) { + $this->debug('Office handleOfficeLinks(): call', ['count' => count($officeLinks)]); + $this->officeIndexService->handleOfficeLinks($officeLinks); + $this->debug('Office handleOfficeLinks(): ok'); + } + } catch (\Throwable $e) { + $this->debug('File indexing failed', [ + 'error' => $e->getMessage(), + 'class' => $e::class, + ]); } $this->debug('Hook end', [ 'final_set_keys' => array_keys($set), + 'final_set' => [ + 'priority' => $set['priority'] ?? null, + 'keywords' => $set['keywords'] ?? null, + 'imagepath' => $set['imagepath'] ?? null, + 'startDate' => $set['startDate'] ?? null, + 'checksum' => $set['checksum'] ?? null, + ], ]); } - /* === Hilfsmethoden unverändert === */ - + /** + * Extrahiert MEILISEARCH_JSON aus HTML-Kommentar + */ private function extractMeilisearchJson(string $content): ?array { if (!preg_match('//s', $content, $m)) { @@ -256,6 +245,9 @@ class IndexPageListener : null; } + /** + * Sammle alle Links + */ private function findAllLinks(string $content): array { if (!preg_match_all( @@ -278,8 +270,12 @@ class IndexPageListener return $result; } + /** + * Ermittelt indexierbaren Dateityp (pdf|docx|xlsx|pptx) oder null + */ private function detectIndexableFileType(string $url): ?string { + // Hash entfernen $url = strtok($url, '#'); $parts = parse_url($url); @@ -287,6 +283,7 @@ class IndexPageListener return null; } + // direkter Pfad (/files/…) if (!empty($parts['path'])) { $ext = strtolower(pathinfo($parts['path'], PATHINFO_EXTENSION)); if (in_array($ext, ['pdf', 'docx', 'xlsx', 'pptx'], true)) { @@ -294,12 +291,18 @@ class IndexPageListener } } + // Query-Parameter (Contao 4 + 5) if (!empty($parts['query'])) { parse_str($parts['query'], $query); foreach (['file', 'p', 'f'] as $param) { if (!empty($query[$param])) { - $candidate = rawurldecode(html_entity_decode((string) $query[$param], ENT_QUOTES)); + $candidate = (string) $query[$param]; + + // sicher decodieren (Contao 4 + 5) + $candidate = html_entity_decode($candidate, ENT_QUOTES); + $candidate = rawurldecode($candidate); + $ext = strtolower(pathinfo($candidate, PATHINFO_EXTENSION)); if (in_array($ext, ['pdf', 'docx', 'xlsx', 'pptx'], true)) { diff --git a/src/Resources/config/services.yaml b/src/Resources/config/services.yaml index 79ae9db..b4e4b9e 100644 --- a/src/Resources/config/services.yaml +++ b/src/Resources/config/services.yaml @@ -3,7 +3,7 @@ services: Psr\Container\ContainerInterface: '@service_container' MummertMedia\ContaoMeilisearchBundle\: - resource: '../../{Command,EventListener,Service}' + resource: '../../{Command,Cron,EventListener,Service}' autowire: true autoconfigure: true @@ -19,6 +19,12 @@ services: tags: - { name: contao.hook, hook: indexPage, method: onIndexPage } + MummertMedia\ContaoMeilisearchBundle\Cron\MeilisearchIndexCron: + autowire: true + autoconfigure: false + tags: + - { name: contao.cron, interval: daily, method: __invoke } + MummertMedia\ContaoMeilisearchBundle\Controller\FrontendModule\MeilisearchSearchController: autowire: true autoconfigure: false diff --git a/src/Resources/contao/dca/tl_search_files.php b/src/Resources/contao/dca/tl_search_files.php index 8fdc052..3fb7f1b 100644 --- a/src/Resources/contao/dca/tl_search_files.php +++ b/src/Resources/contao/dca/tl_search_files.php @@ -2,7 +2,7 @@ use Contao\DC_Table; -$GLOBALS['TL_DCA']['tl_search_files'] = [ +$GLOBALS['TL_DCA']['tl_search_pdf'] = [ 'config' => [ 'dataContainer' => DC_Table::class, 'sql' => [ diff --git a/src/Resources/contao/dca/tl_settings.php b/src/Resources/contao/dca/tl_settings.php index d284ef3..1bde23e 100644 --- a/src/Resources/contao/dca/tl_settings.php +++ b/src/Resources/contao/dca/tl_settings.php @@ -4,17 +4,14 @@ use Contao\CoreBundle\DataContainer\PaletteManipulator; use Contao\System; /** - * ------------------------------------------------- * Fields - * ------------------------------------------------- */ - $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_host'] = [ 'inputType' => 'text', 'eval' => [ 'mandatory' => true, - 'rgxp' => 'url', - 'tl_class' => 'w50', + 'rgxp' => 'url', + 'tl_class' => 'w50', ], ]; @@ -22,7 +19,7 @@ $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index'] = [ 'inputType' => 'text', 'eval' => [ 'mandatory' => true, - 'tl_class' => 'w50', + 'tl_class' => 'w50', ], ]; @@ -30,7 +27,7 @@ $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_api_write'] = [ 'inputType' => 'text', 'eval' => [ 'mandatory' => true, - 'tl_class' => 'w50', + 'tl_class' => 'w50', 'hideInput' => true, ], ]; @@ -39,7 +36,7 @@ $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_api_search'] = [ 'inputType' => 'text', 'eval' => [ 'mandatory' => true, - 'tl_class' => 'w50', + 'tl_class' => 'w50', 'hideInput' => true, ], ]; @@ -58,71 +55,50 @@ $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_imagesize'] = [ return $options; }, 'eval' => [ - 'tl_class' => 'w50', - 'chosen' => true, + 'tl_class' => 'w50', + 'chosen' => true, 'includeBlankOption' => true, ], + // 🔥 DAS HAT GEFEHLT 'sql' => "int(10) unsigned NOT NULL default 0", ]; +$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_past_events'] = [ + 'inputType' => 'checkbox', + 'eval' => [ + 'tl_class' => 'w50 clr', + ], +]; + $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_fallback_image'] = [ 'inputType' => 'fileTree', 'eval' => [ 'filesOnly' => true, 'fieldType' => 'radio', - 'tl_class' => 'w50', + 'tl_class' => 'w50', ], 'sql' => "varbinary(16) NULL", ]; -$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_past_events'] = [ +$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_pdfs'] = [ + 'label' => &$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_pdfs'], 'inputType' => 'checkbox', - 'eval' => [ - 'tl_class' => 'w50 clr', + 'eval' => [ + 'tl_class' => 'w50', ], + 'sql' => "char(1) NOT NULL default '1'", ]; -/** - * ------------------------------------------------- - * Datei-Indexierung (Tika) - * ------------------------------------------------- - */ - -$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_files'] = [ +$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_office'] = [ + 'label' => &$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_office'], 'inputType' => 'checkbox', - 'eval' => [ - 'tl_class' => 'w50', - 'submitOnChange' => true, - ], - 'sql' => "char(1) NOT NULL default '0'", -]; - -$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_tika_url'] = [ - 'inputType' => 'text', - 'eval' => [ - 'rgxp' => 'url', - 'mandatory' => true, - 'tl_class' => 'w50 clr', - ], + 'eval' => ['tl_class' => 'w50'], + 'sql' => "char(1) NOT NULL default '0'", ]; /** - * ------------------------------------------------- - * Selector / Subpalette - * ------------------------------------------------- - */ - -$GLOBALS['TL_DCA']['tl_settings']['palettes']['__selector__'][] = 'meilisearch_index_files'; - -$GLOBALS['TL_DCA']['tl_settings']['subpalettes']['meilisearch_index_files'] - = 'meilisearch_tika_url'; - -/** - * ------------------------------------------------- * Palette - * ------------------------------------------------- */ - PaletteManipulator::create() ->addLegend('meilisearch_legend', null, PaletteManipulator::POSITION_AFTER, true) ->addField('meilisearch_host', 'meilisearch_legend') @@ -132,5 +108,6 @@ PaletteManipulator::create() ->addField('meilisearch_imagesize', 'meilisearch_legend') ->addField('meilisearch_fallback_image', 'meilisearch_legend') ->addField('meilisearch_index_past_events', 'meilisearch_legend') - ->addField('meilisearch_index_files', 'meilisearch_legend') + ->addField('meilisearch_index_pdfs', 'meilisearch_legend') + ->addField('meilisearch_index_office', 'meilisearch_legend') ->applyToPalette('default', 'tl_settings'); \ No newline at end of file diff --git a/src/Resources/contao/languages/de/tl_settings.php b/src/Resources/contao/languages/de/tl_settings.php index e5bdaf3..883f8cb 100644 --- a/src/Resources/contao/languages/de/tl_settings.php +++ b/src/Resources/contao/languages/de/tl_settings.php @@ -28,10 +28,10 @@ $GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_past_events'][0] $GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_past_events'][1] = 'Vergangene Kalender-Events werden ebenfalls in Meilisearch indexiert.'; -$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_files'] = [ - 'Dateien indexieren', - 'Aktiviert die Indexierung von PDF-Dateien sowie DOCX, XLSX und PPTX.', +$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_pdfs'] = [ + 'PDFs indexieren', + 'Aktiviert die Indexierung von PDF-Dateien für die Suche.', ]; -$GLOBALS['TL_LANG']['tl_settings']['meilisearch_tika_url'] - = ['Apache Tika URL', 'URL der Apache Tika Instanz (z. B. https://tika.domain.tld).']; \ No newline at end of file +$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_office'] + = ['Office-Dateien indexieren', 'DOCX, XLSX und PPTX in die Suche aufnehmen.']; \ No newline at end of file diff --git a/src/Resources/contao/templates/frontend_module/mod_meilisearch_search.html.twig b/src/Resources/contao/templates/frontend_module/mod_meilisearch_search.html.twig index 5994299..68343fe 100644 --- a/src/Resources/contao/templates/frontend_module/mod_meilisearch_search.html.twig +++ b/src/Resources/contao/templates/frontend_module/mod_meilisearch_search.html.twig @@ -4,6 +4,7 @@ Contao 5 – Frontend Module Template #} +{% block meilisearch %}