diff --git a/src/Command/MeilisearchFilesCleanupCommand.php b/src/Command/MeilisearchFilesCleanupCommand.php
index 2b88cd0..d968c62 100644
--- a/src/Command/MeilisearchFilesCleanupCommand.php
+++ b/src/Command/MeilisearchFilesCleanupCommand.php
@@ -3,7 +3,7 @@
namespace MummertMedia\ContaoMeilisearchBundle\Command;
use Contao\CoreBundle\Framework\ContaoFramework;
-use Contao\Database;
+use Doctrine\DBAL\Connection;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
@@ -13,6 +13,7 @@ class MeilisearchFilesCleanupCommand extends Command
{
public function __construct(
private readonly ContaoFramework $framework,
+ private readonly Connection $connection,
) {
parent::__construct();
}
@@ -21,13 +22,13 @@ class MeilisearchFilesCleanupCommand extends Command
{
$this
->setName('meilisearch:files:cleanup')
- ->setDescription('Remove stale indexed files (PDF, DOCX, XLSX, PPTX) from tl_search_pdf')
+ ->setDescription('Remove stale indexed files from tl_search_files')
->addOption(
'grace',
null,
InputOption::VALUE_OPTIONAL,
'Grace period in seconds (files newer than now-grace are kept)',
- 86400 // 24 Stunden
+ 86400
)
->addOption(
'dry-run',
@@ -49,10 +50,10 @@ class MeilisearchFilesCleanupCommand extends Command
$cutoff = time() - $grace;
if ($dryRun) {
- $count = Database::getInstance()
- ->prepare('SELECT COUNT(*) AS cnt FROM tl_search_pdf WHERE last_seen < ?')
- ->execute($cutoff)
- ->cnt;
+ $count = $this->connection->fetchOne(
+ 'SELECT COUNT(*) FROM tl_search_files WHERE last_seen < ?',
+ [$cutoff]
+ );
$message = sprintf(
'[DRY-RUN] %d stale file(s) would be removed (last_seen < %s)',
@@ -63,14 +64,14 @@ class MeilisearchFilesCleanupCommand extends Command
$output->writeln('' . $message . '');
$this->log($message);
- $this->log('Cleaner successfully stopped');
+ $this->log('Cleaner stopped (dry-run)');
return Command::SUCCESS;
}
- $affected = Database::getInstance()
- ->prepare('DELETE FROM tl_search_pdf WHERE last_seen < ?')
- ->execute($cutoff)
- ->affectedRows;
+ $affected = $this->connection->executeStatement(
+ 'DELETE FROM tl_search_files WHERE last_seen < ?',
+ [$cutoff]
+ );
$message = sprintf(
'Removed %d stale file(s) (last_seen < %s)',
@@ -92,15 +93,8 @@ class MeilisearchFilesCleanupCommand extends Command
}
}
- /**
- * Einheitliches Logging mit Zeitstempel
- */
private function log(string $message): void
{
- error_log(sprintf(
- '[%s] %s',
- date('Y-m-d H:i:s'),
- $message
- ));
+ error_log(sprintf('[%s] %s', date('Y-m-d H:i:s'), $message));
}
}
\ No newline at end of file
diff --git a/src/EventListener/IndexPageListener.php b/src/EventListener/IndexPageListener.php
index a91e745..05954fe 100644
--- a/src/EventListener/IndexPageListener.php
+++ b/src/EventListener/IndexPageListener.php
@@ -3,15 +3,13 @@
namespace MummertMedia\ContaoMeilisearchBundle\EventListener;
use Contao\Config;
-use MummertMedia\ContaoMeilisearchBundle\Service\PdfIndexService;
-use MummertMedia\ContaoMeilisearchBundle\Service\OfficeIndexService;
+use Contao\System;
class IndexPageListener
{
- public function __construct(
- private readonly PdfIndexService $pdfIndexService,
- private readonly OfficeIndexService $officeIndexService,
- ) {}
+ public function __construct()
+ {
+ }
private function debug(string $message, array $context = []): void
{
@@ -103,7 +101,6 @@ class IndexPageListener
$this->debug('Meta: searchimage candidate', ['searchimage' => $searchImage]);
if (!empty($searchImage)) {
- // >>> HINWEIS: falls dein tl_search-Feld "image" heißt, hier auf $set['image'] ändern!
$set['imagepath'] = trim((string) $searchImage);
}
@@ -139,20 +136,12 @@ class IndexPageListener
'class' => $e::class,
]);
}
-
- $this->debug('Meta: final set snapshot', [
- 'priority' => $set['priority'] ?? null,
- 'keywords' => $set['keywords'] ?? null,
- 'imagepath' => $set['imagepath'] ?? null,
- 'startDate' => $set['startDate'] ?? null,
- 'checksum' => $set['checksum'] ?? null,
- ]);
}
}
/*
* =====================
- * DATEI-INDEXIERUNG (PDF / OFFICE)
+ * DATEI-ERKENNUNG + UPSERT
* =====================
*/
if ((int) ($data['protected'] ?? 0) !== 0) {
@@ -160,15 +149,13 @@ class IndexPageListener
return;
}
- $indexPdfs = (bool) Config::get('meilisearch_index_pdfs');
- $indexOffice = (bool) Config::get('meilisearch_index_office');
+ $indexFiles = (bool) Config::get('meilisearch_index_files');
- $this->debug('File indexing settings', [
- 'meilisearch_index_pdfs' => $indexPdfs,
- 'meilisearch_index_office' => $indexOffice,
+ $this->debug('File indexing setting', [
+ 'meilisearch_index_files' => $indexFiles,
]);
- if (!$indexPdfs && !$indexOffice) {
+ if (!$indexFiles) {
$this->debug('Abort: file indexing disabled');
return;
}
@@ -176,61 +163,85 @@ class IndexPageListener
$links = $this->findAllLinks($content);
$this->debug('Links found', ['count' => count($links)]);
- $pdfLinks = [];
- $officeLinks = [];
+ $fileLinks = [];
foreach ($links as $link) {
$type = $this->detectIndexableFileType($link['url']);
-
- if ($type === 'pdf' && $indexPdfs) {
- $pdfLinks[] = $link;
- continue;
- }
-
- if (in_array($type, ['docx', 'xlsx', 'pptx'], true) && $indexOffice) {
- $officeLinks[] = $link;
+ if ($type !== null) {
+ $fileLinks[] = $link + ['type' => $type];
}
}
- $this->debug('Indexable file links', [
- 'pdf' => count($pdfLinks),
- 'office' => count($officeLinks),
+ $this->debug('Indexable file links found', [
+ 'count' => count($fileLinks),
+ 'types' => array_count_values(array_column($fileLinks, 'type')),
]);
- try {
- if ($pdfLinks !== []) {
- $this->debug('PDF handlePdfLinks(): call', ['count' => count($pdfLinks)]);
- $this->pdfIndexService->handlePdfLinks($pdfLinks);
- $this->debug('PDF handlePdfLinks(): ok');
- }
+ if ($fileLinks) {
+ $db = System::getContainer()->get('database_connection');
+ $time = time();
- if ($officeLinks !== []) {
- $this->debug('Office handleOfficeLinks(): call', ['count' => count($officeLinks)]);
- $this->officeIndexService->handleOfficeLinks($officeLinks);
- $this->debug('Office handleOfficeLinks(): ok');
+ foreach ($fileLinks as $file) {
+ $url = strtok($file['url'], '#');
+
+ $path = parse_url($url, PHP_URL_PATH);
+ $abs = $path ? TL_ROOT . '/' . ltrim($path, '/') : null;
+
+ $mtime = ($abs && is_file($abs)) ? filemtime($abs) : 0;
+ $checksum = md5($url . '|' . $mtime);
+
+ $existing = $db->fetchAssociative(
+ 'SELECT id, checksum FROM tl_search_files WHERE url = ?',
+ [$url]
+ );
+
+ if ($existing) {
+ $db->update(
+ 'tl_search_files',
+ [
+ 'tstamp' => $time,
+ 'last_seen' => $time,
+ 'page_id' => (int) ($data['pid'] ?? 0),
+ 'file_mtime' => $mtime,
+ 'checksum' => $checksum,
+ ],
+ ['id' => $existing['id']]
+ );
+
+ $this->debug('File updated', [
+ 'url' => $url,
+ 'checksum' => $checksum,
+ ]);
+ } else {
+ $db->insert(
+ 'tl_search_files',
+ [
+ 'tstamp' => $time,
+ 'last_seen' => $time,
+ 'type' => $file['type'],
+ 'url' => $url,
+ 'title' => $file['linkText'] ?? basename($url),
+ 'page_id' => (int) ($data['pid'] ?? 0),
+ 'file_mtime' => $mtime,
+ 'checksum' => $checksum,
+ ]
+ );
+
+ $this->debug('File inserted', [
+ 'url' => $url,
+ 'checksum' => $checksum,
+ ]);
+ }
}
- } catch (\Throwable $e) {
- $this->debug('File indexing failed', [
- 'error' => $e->getMessage(),
- 'class' => $e::class,
- ]);
}
$this->debug('Hook end', [
'final_set_keys' => array_keys($set),
- 'final_set' => [
- 'priority' => $set['priority'] ?? null,
- 'keywords' => $set['keywords'] ?? null,
- 'imagepath' => $set['imagepath'] ?? null,
- 'startDate' => $set['startDate'] ?? null,
- 'checksum' => $set['checksum'] ?? null,
- ],
]);
}
- /**
- * Extrahiert MEILISEARCH_JSON aus HTML-Kommentar
- */
+ /* === Hilfsmethoden unverändert === */
+
private function extractMeilisearchJson(string $content): ?array
{
if (!preg_match('//s', $content, $m)) {
@@ -245,9 +256,6 @@ class IndexPageListener
: null;
}
- /**
- * Sammle alle Links
- */
private function findAllLinks(string $content): array
{
if (!preg_match_all(
@@ -270,12 +278,8 @@ class IndexPageListener
return $result;
}
- /**
- * Ermittelt indexierbaren Dateityp (pdf|docx|xlsx|pptx) oder null
- */
private function detectIndexableFileType(string $url): ?string
{
- // Hash entfernen
$url = strtok($url, '#');
$parts = parse_url($url);
@@ -283,7 +287,6 @@ class IndexPageListener
return null;
}
- // direkter Pfad (/files/…)
if (!empty($parts['path'])) {
$ext = strtolower(pathinfo($parts['path'], PATHINFO_EXTENSION));
if (in_array($ext, ['pdf', 'docx', 'xlsx', 'pptx'], true)) {
@@ -291,18 +294,12 @@ class IndexPageListener
}
}
- // Query-Parameter (Contao 4 + 5)
if (!empty($parts['query'])) {
parse_str($parts['query'], $query);
foreach (['file', 'p', 'f'] as $param) {
if (!empty($query[$param])) {
- $candidate = (string) $query[$param];
-
- // sicher decodieren (Contao 4 + 5)
- $candidate = html_entity_decode($candidate, ENT_QUOTES);
- $candidate = rawurldecode($candidate);
-
+ $candidate = rawurldecode(html_entity_decode((string) $query[$param], ENT_QUOTES));
$ext = strtolower(pathinfo($candidate, PATHINFO_EXTENSION));
if (in_array($ext, ['pdf', 'docx', 'xlsx', 'pptx'], true)) {
diff --git a/src/Resources/config/services.yaml b/src/Resources/config/services.yaml
index b4e4b9e..79ae9db 100644
--- a/src/Resources/config/services.yaml
+++ b/src/Resources/config/services.yaml
@@ -3,7 +3,7 @@ services:
Psr\Container\ContainerInterface: '@service_container'
MummertMedia\ContaoMeilisearchBundle\:
- resource: '../../{Command,Cron,EventListener,Service}'
+ resource: '../../{Command,EventListener,Service}'
autowire: true
autoconfigure: true
@@ -19,12 +19,6 @@ services:
tags:
- { name: contao.hook, hook: indexPage, method: onIndexPage }
- MummertMedia\ContaoMeilisearchBundle\Cron\MeilisearchIndexCron:
- autowire: true
- autoconfigure: false
- tags:
- - { name: contao.cron, interval: daily, method: __invoke }
-
MummertMedia\ContaoMeilisearchBundle\Controller\FrontendModule\MeilisearchSearchController:
autowire: true
autoconfigure: false
diff --git a/src/Resources/contao/dca/tl_search_files.php b/src/Resources/contao/dca/tl_search_files.php
index 3fb7f1b..8fdc052 100644
--- a/src/Resources/contao/dca/tl_search_files.php
+++ b/src/Resources/contao/dca/tl_search_files.php
@@ -2,7 +2,7 @@
use Contao\DC_Table;
-$GLOBALS['TL_DCA']['tl_search_pdf'] = [
+$GLOBALS['TL_DCA']['tl_search_files'] = [
'config' => [
'dataContainer' => DC_Table::class,
'sql' => [
diff --git a/src/Resources/contao/dca/tl_settings.php b/src/Resources/contao/dca/tl_settings.php
index 1bde23e..d284ef3 100644
--- a/src/Resources/contao/dca/tl_settings.php
+++ b/src/Resources/contao/dca/tl_settings.php
@@ -4,14 +4,17 @@ use Contao\CoreBundle\DataContainer\PaletteManipulator;
use Contao\System;
/**
+ * -------------------------------------------------
* Fields
+ * -------------------------------------------------
*/
+
$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_host'] = [
'inputType' => 'text',
'eval' => [
'mandatory' => true,
- 'rgxp' => 'url',
- 'tl_class' => 'w50',
+ 'rgxp' => 'url',
+ 'tl_class' => 'w50',
],
];
@@ -19,7 +22,7 @@ $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index'] = [
'inputType' => 'text',
'eval' => [
'mandatory' => true,
- 'tl_class' => 'w50',
+ 'tl_class' => 'w50',
],
];
@@ -27,7 +30,7 @@ $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_api_write'] = [
'inputType' => 'text',
'eval' => [
'mandatory' => true,
- 'tl_class' => 'w50',
+ 'tl_class' => 'w50',
'hideInput' => true,
],
];
@@ -36,7 +39,7 @@ $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_api_search'] = [
'inputType' => 'text',
'eval' => [
'mandatory' => true,
- 'tl_class' => 'w50',
+ 'tl_class' => 'w50',
'hideInput' => true,
],
];
@@ -55,50 +58,71 @@ $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_imagesize'] = [
return $options;
},
'eval' => [
- 'tl_class' => 'w50',
- 'chosen' => true,
+ 'tl_class' => 'w50',
+ 'chosen' => true,
'includeBlankOption' => true,
],
- // 🔥 DAS HAT GEFEHLT
'sql' => "int(10) unsigned NOT NULL default 0",
];
-$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_past_events'] = [
- 'inputType' => 'checkbox',
- 'eval' => [
- 'tl_class' => 'w50 clr',
- ],
-];
-
$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_fallback_image'] = [
'inputType' => 'fileTree',
'eval' => [
'filesOnly' => true,
'fieldType' => 'radio',
- 'tl_class' => 'w50',
+ 'tl_class' => 'w50',
],
'sql' => "varbinary(16) NULL",
];
-$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_pdfs'] = [
- 'label' => &$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_pdfs'],
+$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_past_events'] = [
'inputType' => 'checkbox',
- 'eval' => [
- 'tl_class' => 'w50',
+ 'eval' => [
+ 'tl_class' => 'w50 clr',
],
- 'sql' => "char(1) NOT NULL default '1'",
-];
-
-$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_office'] = [
- 'label' => &$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_office'],
- 'inputType' => 'checkbox',
- 'eval' => ['tl_class' => 'w50'],
- 'sql' => "char(1) NOT NULL default '0'",
];
/**
- * Palette
+ * -------------------------------------------------
+ * Datei-Indexierung (Tika)
+ * -------------------------------------------------
*/
+
+$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_files'] = [
+ 'inputType' => 'checkbox',
+ 'eval' => [
+ 'tl_class' => 'w50',
+ 'submitOnChange' => true,
+ ],
+ 'sql' => "char(1) NOT NULL default '0'",
+];
+
+$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_tika_url'] = [
+ 'inputType' => 'text',
+ 'eval' => [
+ 'rgxp' => 'url',
+ 'mandatory' => true,
+ 'tl_class' => 'w50 clr',
+ ],
+];
+
+/**
+ * -------------------------------------------------
+ * Selector / Subpalette
+ * -------------------------------------------------
+ */
+
+$GLOBALS['TL_DCA']['tl_settings']['palettes']['__selector__'][] = 'meilisearch_index_files';
+
+$GLOBALS['TL_DCA']['tl_settings']['subpalettes']['meilisearch_index_files']
+ = 'meilisearch_tika_url';
+
+/**
+ * -------------------------------------------------
+ * Palette
+ * -------------------------------------------------
+ */
+
PaletteManipulator::create()
->addLegend('meilisearch_legend', null, PaletteManipulator::POSITION_AFTER, true)
->addField('meilisearch_host', 'meilisearch_legend')
@@ -108,6 +132,5 @@ PaletteManipulator::create()
->addField('meilisearch_imagesize', 'meilisearch_legend')
->addField('meilisearch_fallback_image', 'meilisearch_legend')
->addField('meilisearch_index_past_events', 'meilisearch_legend')
- ->addField('meilisearch_index_pdfs', 'meilisearch_legend')
- ->addField('meilisearch_index_office', 'meilisearch_legend')
+ ->addField('meilisearch_index_files', 'meilisearch_legend')
->applyToPalette('default', 'tl_settings');
\ No newline at end of file
diff --git a/src/Resources/contao/languages/de/tl_settings.php b/src/Resources/contao/languages/de/tl_settings.php
index 883f8cb..e5bdaf3 100644
--- a/src/Resources/contao/languages/de/tl_settings.php
+++ b/src/Resources/contao/languages/de/tl_settings.php
@@ -28,10 +28,10 @@ $GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_past_events'][0]
$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_past_events'][1]
= 'Vergangene Kalender-Events werden ebenfalls in Meilisearch indexiert.';
-$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_pdfs'] = [
- 'PDFs indexieren',
- 'Aktiviert die Indexierung von PDF-Dateien für die Suche.',
+$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_files'] = [
+ 'Dateien indexieren',
+ 'Aktiviert die Indexierung von PDF-Dateien sowie DOCX, XLSX und PPTX.',
];
-$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_office']
- = ['Office-Dateien indexieren', 'DOCX, XLSX und PPTX in die Suche aufnehmen.'];
\ No newline at end of file
+$GLOBALS['TL_LANG']['tl_settings']['meilisearch_tika_url']
+ = ['Apache Tika URL', 'URL der Apache Tika Instanz (z. B. https://tika.domain.tld).'];
\ No newline at end of file