diff --git a/src/Command/MeilisearchFilesCleanupCommand.php b/src/Command/MeilisearchFilesCleanupCommand.php index 2b88cd0..d968c62 100644 --- a/src/Command/MeilisearchFilesCleanupCommand.php +++ b/src/Command/MeilisearchFilesCleanupCommand.php @@ -3,7 +3,7 @@ namespace MummertMedia\ContaoMeilisearchBundle\Command; use Contao\CoreBundle\Framework\ContaoFramework; -use Contao\Database; +use Doctrine\DBAL\Connection; use Symfony\Component\Console\Command\Command; use Symfony\Component\Console\Input\InputInterface; use Symfony\Component\Console\Input\InputOption; @@ -13,6 +13,7 @@ class MeilisearchFilesCleanupCommand extends Command { public function __construct( private readonly ContaoFramework $framework, + private readonly Connection $connection, ) { parent::__construct(); } @@ -21,13 +22,13 @@ class MeilisearchFilesCleanupCommand extends Command { $this ->setName('meilisearch:files:cleanup') - ->setDescription('Remove stale indexed files (PDF, DOCX, XLSX, PPTX) from tl_search_pdf') + ->setDescription('Remove stale indexed files from tl_search_files') ->addOption( 'grace', null, InputOption::VALUE_OPTIONAL, 'Grace period in seconds (files newer than now-grace are kept)', - 86400 // 24 Stunden + 86400 ) ->addOption( 'dry-run', @@ -49,10 +50,10 @@ class MeilisearchFilesCleanupCommand extends Command $cutoff = time() - $grace; if ($dryRun) { - $count = Database::getInstance() - ->prepare('SELECT COUNT(*) AS cnt FROM tl_search_pdf WHERE last_seen < ?') - ->execute($cutoff) - ->cnt; + $count = $this->connection->fetchOne( + 'SELECT COUNT(*) FROM tl_search_files WHERE last_seen < ?', + [$cutoff] + ); $message = sprintf( '[DRY-RUN] %d stale file(s) would be removed (last_seen < %s)', @@ -63,14 +64,14 @@ class MeilisearchFilesCleanupCommand extends Command $output->writeln('' . $message . ''); $this->log($message); - $this->log('Cleaner successfully stopped'); + $this->log('Cleaner stopped (dry-run)'); return Command::SUCCESS; } - $affected = Database::getInstance() - ->prepare('DELETE FROM tl_search_pdf WHERE last_seen < ?') - ->execute($cutoff) - ->affectedRows; + $affected = $this->connection->executeStatement( + 'DELETE FROM tl_search_files WHERE last_seen < ?', + [$cutoff] + ); $message = sprintf( 'Removed %d stale file(s) (last_seen < %s)', @@ -92,15 +93,8 @@ class MeilisearchFilesCleanupCommand extends Command } } - /** - * Einheitliches Logging mit Zeitstempel - */ private function log(string $message): void { - error_log(sprintf( - '[%s] %s', - date('Y-m-d H:i:s'), - $message - )); + error_log(sprintf('[%s] %s', date('Y-m-d H:i:s'), $message)); } } \ No newline at end of file diff --git a/src/EventListener/IndexPageListener.php b/src/EventListener/IndexPageListener.php index a91e745..05954fe 100644 --- a/src/EventListener/IndexPageListener.php +++ b/src/EventListener/IndexPageListener.php @@ -3,15 +3,13 @@ namespace MummertMedia\ContaoMeilisearchBundle\EventListener; use Contao\Config; -use MummertMedia\ContaoMeilisearchBundle\Service\PdfIndexService; -use MummertMedia\ContaoMeilisearchBundle\Service\OfficeIndexService; +use Contao\System; class IndexPageListener { - public function __construct( - private readonly PdfIndexService $pdfIndexService, - private readonly OfficeIndexService $officeIndexService, - ) {} + public function __construct() + { + } private function debug(string $message, array $context = []): void { @@ -103,7 +101,6 @@ class IndexPageListener $this->debug('Meta: searchimage candidate', ['searchimage' => $searchImage]); if (!empty($searchImage)) { - // >>> HINWEIS: falls dein tl_search-Feld "image" heißt, hier auf $set['image'] ändern! $set['imagepath'] = trim((string) $searchImage); } @@ -139,20 +136,12 @@ class IndexPageListener 'class' => $e::class, ]); } - - $this->debug('Meta: final set snapshot', [ - 'priority' => $set['priority'] ?? null, - 'keywords' => $set['keywords'] ?? null, - 'imagepath' => $set['imagepath'] ?? null, - 'startDate' => $set['startDate'] ?? null, - 'checksum' => $set['checksum'] ?? null, - ]); } } /* * ===================== - * DATEI-INDEXIERUNG (PDF / OFFICE) + * DATEI-ERKENNUNG + UPSERT * ===================== */ if ((int) ($data['protected'] ?? 0) !== 0) { @@ -160,15 +149,13 @@ class IndexPageListener return; } - $indexPdfs = (bool) Config::get('meilisearch_index_pdfs'); - $indexOffice = (bool) Config::get('meilisearch_index_office'); + $indexFiles = (bool) Config::get('meilisearch_index_files'); - $this->debug('File indexing settings', [ - 'meilisearch_index_pdfs' => $indexPdfs, - 'meilisearch_index_office' => $indexOffice, + $this->debug('File indexing setting', [ + 'meilisearch_index_files' => $indexFiles, ]); - if (!$indexPdfs && !$indexOffice) { + if (!$indexFiles) { $this->debug('Abort: file indexing disabled'); return; } @@ -176,61 +163,85 @@ class IndexPageListener $links = $this->findAllLinks($content); $this->debug('Links found', ['count' => count($links)]); - $pdfLinks = []; - $officeLinks = []; + $fileLinks = []; foreach ($links as $link) { $type = $this->detectIndexableFileType($link['url']); - - if ($type === 'pdf' && $indexPdfs) { - $pdfLinks[] = $link; - continue; - } - - if (in_array($type, ['docx', 'xlsx', 'pptx'], true) && $indexOffice) { - $officeLinks[] = $link; + if ($type !== null) { + $fileLinks[] = $link + ['type' => $type]; } } - $this->debug('Indexable file links', [ - 'pdf' => count($pdfLinks), - 'office' => count($officeLinks), + $this->debug('Indexable file links found', [ + 'count' => count($fileLinks), + 'types' => array_count_values(array_column($fileLinks, 'type')), ]); - try { - if ($pdfLinks !== []) { - $this->debug('PDF handlePdfLinks(): call', ['count' => count($pdfLinks)]); - $this->pdfIndexService->handlePdfLinks($pdfLinks); - $this->debug('PDF handlePdfLinks(): ok'); - } + if ($fileLinks) { + $db = System::getContainer()->get('database_connection'); + $time = time(); - if ($officeLinks !== []) { - $this->debug('Office handleOfficeLinks(): call', ['count' => count($officeLinks)]); - $this->officeIndexService->handleOfficeLinks($officeLinks); - $this->debug('Office handleOfficeLinks(): ok'); + foreach ($fileLinks as $file) { + $url = strtok($file['url'], '#'); + + $path = parse_url($url, PHP_URL_PATH); + $abs = $path ? TL_ROOT . '/' . ltrim($path, '/') : null; + + $mtime = ($abs && is_file($abs)) ? filemtime($abs) : 0; + $checksum = md5($url . '|' . $mtime); + + $existing = $db->fetchAssociative( + 'SELECT id, checksum FROM tl_search_files WHERE url = ?', + [$url] + ); + + if ($existing) { + $db->update( + 'tl_search_files', + [ + 'tstamp' => $time, + 'last_seen' => $time, + 'page_id' => (int) ($data['pid'] ?? 0), + 'file_mtime' => $mtime, + 'checksum' => $checksum, + ], + ['id' => $existing['id']] + ); + + $this->debug('File updated', [ + 'url' => $url, + 'checksum' => $checksum, + ]); + } else { + $db->insert( + 'tl_search_files', + [ + 'tstamp' => $time, + 'last_seen' => $time, + 'type' => $file['type'], + 'url' => $url, + 'title' => $file['linkText'] ?? basename($url), + 'page_id' => (int) ($data['pid'] ?? 0), + 'file_mtime' => $mtime, + 'checksum' => $checksum, + ] + ); + + $this->debug('File inserted', [ + 'url' => $url, + 'checksum' => $checksum, + ]); + } } - } catch (\Throwable $e) { - $this->debug('File indexing failed', [ - 'error' => $e->getMessage(), - 'class' => $e::class, - ]); } $this->debug('Hook end', [ 'final_set_keys' => array_keys($set), - 'final_set' => [ - 'priority' => $set['priority'] ?? null, - 'keywords' => $set['keywords'] ?? null, - 'imagepath' => $set['imagepath'] ?? null, - 'startDate' => $set['startDate'] ?? null, - 'checksum' => $set['checksum'] ?? null, - ], ]); } - /** - * Extrahiert MEILISEARCH_JSON aus HTML-Kommentar - */ + /* === Hilfsmethoden unverändert === */ + private function extractMeilisearchJson(string $content): ?array { if (!preg_match('//s', $content, $m)) { @@ -245,9 +256,6 @@ class IndexPageListener : null; } - /** - * Sammle alle Links - */ private function findAllLinks(string $content): array { if (!preg_match_all( @@ -270,12 +278,8 @@ class IndexPageListener return $result; } - /** - * Ermittelt indexierbaren Dateityp (pdf|docx|xlsx|pptx) oder null - */ private function detectIndexableFileType(string $url): ?string { - // Hash entfernen $url = strtok($url, '#'); $parts = parse_url($url); @@ -283,7 +287,6 @@ class IndexPageListener return null; } - // direkter Pfad (/files/…) if (!empty($parts['path'])) { $ext = strtolower(pathinfo($parts['path'], PATHINFO_EXTENSION)); if (in_array($ext, ['pdf', 'docx', 'xlsx', 'pptx'], true)) { @@ -291,18 +294,12 @@ class IndexPageListener } } - // Query-Parameter (Contao 4 + 5) if (!empty($parts['query'])) { parse_str($parts['query'], $query); foreach (['file', 'p', 'f'] as $param) { if (!empty($query[$param])) { - $candidate = (string) $query[$param]; - - // sicher decodieren (Contao 4 + 5) - $candidate = html_entity_decode($candidate, ENT_QUOTES); - $candidate = rawurldecode($candidate); - + $candidate = rawurldecode(html_entity_decode((string) $query[$param], ENT_QUOTES)); $ext = strtolower(pathinfo($candidate, PATHINFO_EXTENSION)); if (in_array($ext, ['pdf', 'docx', 'xlsx', 'pptx'], true)) { diff --git a/src/Resources/config/services.yaml b/src/Resources/config/services.yaml index b4e4b9e..79ae9db 100644 --- a/src/Resources/config/services.yaml +++ b/src/Resources/config/services.yaml @@ -3,7 +3,7 @@ services: Psr\Container\ContainerInterface: '@service_container' MummertMedia\ContaoMeilisearchBundle\: - resource: '../../{Command,Cron,EventListener,Service}' + resource: '../../{Command,EventListener,Service}' autowire: true autoconfigure: true @@ -19,12 +19,6 @@ services: tags: - { name: contao.hook, hook: indexPage, method: onIndexPage } - MummertMedia\ContaoMeilisearchBundle\Cron\MeilisearchIndexCron: - autowire: true - autoconfigure: false - tags: - - { name: contao.cron, interval: daily, method: __invoke } - MummertMedia\ContaoMeilisearchBundle\Controller\FrontendModule\MeilisearchSearchController: autowire: true autoconfigure: false diff --git a/src/Resources/contao/dca/tl_search_files.php b/src/Resources/contao/dca/tl_search_files.php index 3fb7f1b..8fdc052 100644 --- a/src/Resources/contao/dca/tl_search_files.php +++ b/src/Resources/contao/dca/tl_search_files.php @@ -2,7 +2,7 @@ use Contao\DC_Table; -$GLOBALS['TL_DCA']['tl_search_pdf'] = [ +$GLOBALS['TL_DCA']['tl_search_files'] = [ 'config' => [ 'dataContainer' => DC_Table::class, 'sql' => [ diff --git a/src/Resources/contao/dca/tl_settings.php b/src/Resources/contao/dca/tl_settings.php index 1bde23e..d284ef3 100644 --- a/src/Resources/contao/dca/tl_settings.php +++ b/src/Resources/contao/dca/tl_settings.php @@ -4,14 +4,17 @@ use Contao\CoreBundle\DataContainer\PaletteManipulator; use Contao\System; /** + * ------------------------------------------------- * Fields + * ------------------------------------------------- */ + $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_host'] = [ 'inputType' => 'text', 'eval' => [ 'mandatory' => true, - 'rgxp' => 'url', - 'tl_class' => 'w50', + 'rgxp' => 'url', + 'tl_class' => 'w50', ], ]; @@ -19,7 +22,7 @@ $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index'] = [ 'inputType' => 'text', 'eval' => [ 'mandatory' => true, - 'tl_class' => 'w50', + 'tl_class' => 'w50', ], ]; @@ -27,7 +30,7 @@ $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_api_write'] = [ 'inputType' => 'text', 'eval' => [ 'mandatory' => true, - 'tl_class' => 'w50', + 'tl_class' => 'w50', 'hideInput' => true, ], ]; @@ -36,7 +39,7 @@ $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_api_search'] = [ 'inputType' => 'text', 'eval' => [ 'mandatory' => true, - 'tl_class' => 'w50', + 'tl_class' => 'w50', 'hideInput' => true, ], ]; @@ -55,50 +58,71 @@ $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_imagesize'] = [ return $options; }, 'eval' => [ - 'tl_class' => 'w50', - 'chosen' => true, + 'tl_class' => 'w50', + 'chosen' => true, 'includeBlankOption' => true, ], - // 🔥 DAS HAT GEFEHLT 'sql' => "int(10) unsigned NOT NULL default 0", ]; -$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_past_events'] = [ - 'inputType' => 'checkbox', - 'eval' => [ - 'tl_class' => 'w50 clr', - ], -]; - $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_fallback_image'] = [ 'inputType' => 'fileTree', 'eval' => [ 'filesOnly' => true, 'fieldType' => 'radio', - 'tl_class' => 'w50', + 'tl_class' => 'w50', ], 'sql' => "varbinary(16) NULL", ]; -$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_pdfs'] = [ - 'label' => &$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_pdfs'], +$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_past_events'] = [ 'inputType' => 'checkbox', - 'eval' => [ - 'tl_class' => 'w50', + 'eval' => [ + 'tl_class' => 'w50 clr', ], - 'sql' => "char(1) NOT NULL default '1'", -]; - -$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_office'] = [ - 'label' => &$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_office'], - 'inputType' => 'checkbox', - 'eval' => ['tl_class' => 'w50'], - 'sql' => "char(1) NOT NULL default '0'", ]; /** - * Palette + * ------------------------------------------------- + * Datei-Indexierung (Tika) + * ------------------------------------------------- */ + +$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_files'] = [ + 'inputType' => 'checkbox', + 'eval' => [ + 'tl_class' => 'w50', + 'submitOnChange' => true, + ], + 'sql' => "char(1) NOT NULL default '0'", +]; + +$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_tika_url'] = [ + 'inputType' => 'text', + 'eval' => [ + 'rgxp' => 'url', + 'mandatory' => true, + 'tl_class' => 'w50 clr', + ], +]; + +/** + * ------------------------------------------------- + * Selector / Subpalette + * ------------------------------------------------- + */ + +$GLOBALS['TL_DCA']['tl_settings']['palettes']['__selector__'][] = 'meilisearch_index_files'; + +$GLOBALS['TL_DCA']['tl_settings']['subpalettes']['meilisearch_index_files'] + = 'meilisearch_tika_url'; + +/** + * ------------------------------------------------- + * Palette + * ------------------------------------------------- + */ + PaletteManipulator::create() ->addLegend('meilisearch_legend', null, PaletteManipulator::POSITION_AFTER, true) ->addField('meilisearch_host', 'meilisearch_legend') @@ -108,6 +132,5 @@ PaletteManipulator::create() ->addField('meilisearch_imagesize', 'meilisearch_legend') ->addField('meilisearch_fallback_image', 'meilisearch_legend') ->addField('meilisearch_index_past_events', 'meilisearch_legend') - ->addField('meilisearch_index_pdfs', 'meilisearch_legend') - ->addField('meilisearch_index_office', 'meilisearch_legend') + ->addField('meilisearch_index_files', 'meilisearch_legend') ->applyToPalette('default', 'tl_settings'); \ No newline at end of file diff --git a/src/Resources/contao/languages/de/tl_settings.php b/src/Resources/contao/languages/de/tl_settings.php index 883f8cb..e5bdaf3 100644 --- a/src/Resources/contao/languages/de/tl_settings.php +++ b/src/Resources/contao/languages/de/tl_settings.php @@ -28,10 +28,10 @@ $GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_past_events'][0] $GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_past_events'][1] = 'Vergangene Kalender-Events werden ebenfalls in Meilisearch indexiert.'; -$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_pdfs'] = [ - 'PDFs indexieren', - 'Aktiviert die Indexierung von PDF-Dateien für die Suche.', +$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_files'] = [ + 'Dateien indexieren', + 'Aktiviert die Indexierung von PDF-Dateien sowie DOCX, XLSX und PPTX.', ]; -$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_office'] - = ['Office-Dateien indexieren', 'DOCX, XLSX und PPTX in die Suche aufnehmen.']; \ No newline at end of file +$GLOBALS['TL_LANG']['tl_settings']['meilisearch_tika_url'] + = ['Apache Tika URL', 'URL der Apache Tika Instanz (z. B. https://tika.domain.tld).']; \ No newline at end of file