Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
99.05% covered (success)
99.05%
104 / 105
90.00% covered (success)
90.00%
9 / 10
CRAP
0.00% covered (danger)
0.00%
0 / 1
SearchUtils
99.05% covered (success)
99.05%
104 / 105
90.00% covered (success)
90.00%
9 / 10
24
0.00% covered (danger)
0.00%
0 / 1
 getSearchResults
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
2
 getPageSearchResults
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
1
 getDateCriteria
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
2
 getStaticSearchResults
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
3
 getScoredSearchResult
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
2
 getDateFormattings
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
2
 analyze
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
4
 getCutout
100.00% covered (success)
100.00%
18 / 18
100.00% covered (success)
100.00%
1 / 1
5
 highlight
100.00% covered (success)
100.00%
21 / 21
100.00% covered (success)
100.00%
1 / 1
2
 fromEnv
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace Olz\Suche\Utils;
4
5use Doctrine\Common\Collections\Criteria;
6use Doctrine\Common\Collections\Expr\Expression;
7use Olz\Apps\Anmelden\Components\OlzAnmelden\OlzAnmelden;
8use Olz\Apps\Commands\Components\OlzCommands\OlzCommands;
9use Olz\Apps\Files\Components\OlzFiles\OlzFiles;
10use Olz\Apps\Logs\Components\OlzLogs\OlzLogs;
11use Olz\Apps\Members\Components\OlzMembers\OlzMembers;
12use Olz\Apps\Monitoring\Components\OlzMonitoring\OlzMonitoring;
13use Olz\Apps\Newsletter\Components\OlzNewsletter\OlzNewsletter;
14use Olz\Apps\Oev\Components\OlzOev\OlzOev;
15use Olz\Apps\Panini2024\Components\OlzPanini2024\OlzPanini2024;
16use Olz\Apps\Panini2024\Components\OlzPanini2024All\OlzPanini2024All;
17use Olz\Apps\Panini2024\Components\OlzPanini2024Masks\OlzPanini2024Masks;
18use Olz\Apps\Quiz\Components\OlzQuiz\OlzQuiz;
19use Olz\Apps\Results\Components\OlzResults\OlzResults;
20use Olz\Apps\SearchEngines\Components\OlzSearchEngines\OlzSearchEngines;
21use Olz\Apps\Statistics\Components\OlzStatistics\OlzStatistics;
22use Olz\Apps\Youtube\Components\OlzYoutube\OlzYoutube;
23use Olz\Components\Auth\OlzEmailReaktion\OlzEmailReaktion;
24use Olz\Components\Common\OlzRootComponent;
25use Olz\Components\OlzHtmlSitemap\OlzHtmlSitemap;
26use Olz\Components\OtherPages\OlzDatenschutz\OlzDatenschutz;
27use Olz\Components\OtherPages\OlzFuerEinsteiger\OlzFuerEinsteiger;
28use Olz\Components\OtherPages\OlzMaterial\OlzMaterial;
29use Olz\Faq\Components\OlzFaqDetail\OlzFaqDetail;
30use Olz\Faq\Components\OlzFaqList\OlzFaqList;
31use Olz\Karten\Components\OlzKarteDetail\OlzKarteDetail;
32use Olz\Karten\Components\OlzKarten\OlzKarten;
33use Olz\News\Components\OlzNewsDetail\OlzNewsDetail;
34use Olz\News\Components\OlzNewsList\OlzNewsList;
35use Olz\Roles\Components\OlzRolePage\OlzRolePage;
36use Olz\Roles\Components\OlzVerein\OlzVerein;
37use Olz\Service\Components\OlzService\OlzService;
38use Olz\Startseite\Components\OlzStartseite\OlzStartseite;
39use Olz\Suche\Components\OlzSuche\OlzSuche;
40use Olz\Termine\Components\OlzTerminDetail\OlzTerminDetail;
41use Olz\Termine\Components\OlzTermineList\OlzTermineList;
42use Olz\Termine\Components\OlzTerminLocationDetail\OlzTerminLocationDetail;
43use Olz\Termine\Components\OlzTerminLocationsList\OlzTerminLocationsList;
44use Olz\Termine\Components\OlzTerminTemplateDetail\OlzTerminTemplateDetail;
45use Olz\Termine\Components\OlzTerminTemplatesList\OlzTerminTemplatesList;
46use Olz\Users\Components\OlzUserDetail\OlzUserDetail;
47use Olz\Utils\WithUtilsTrait;
48
49/**
50 * @phpstan-type SearchResult array{
51 *   score: float,
52 *   link: non-empty-string,
53 *   icon: ?non-empty-string,
54 *   date: ?\DateTime,
55 *   title: non-empty-string,
56 *   text: ?non-empty-string,
57 * }
58 * @phpstan-type PageSearchResults array{
59 *   title: non-empty-string,
60 *   bestScore: ?float,
61 *   results: array<SearchResult>,
62 * }
63 */
64class SearchUtils {
65    use WithUtilsTrait;
66
67    /** @var array<class-string<OlzRootComponent<mixed>>> */
68    protected static array $all_page_classes = [
69        // All classes that extend `OlzRootComponent` should be listed here:
70        OlzAnmelden::class,
71        OlzCommands::class,
72        OlzFiles::class,
73        OlzLogs::class,
74        OlzMembers::class,
75        OlzMonitoring::class,
76        OlzNewsletter::class,
77        OlzOev::class,
78        OlzPanini2024::class,
79        OlzPanini2024All::class,
80        OlzPanini2024Masks::class,
81        OlzQuiz::class,
82        OlzResults::class,
83        OlzSearchEngines::class,
84        OlzStatistics::class,
85        OlzYoutube::class,
86        OlzEmailReaktion::class,
87        OlzHtmlSitemap::class,
88        OlzDatenschutz::class,
89        OlzFuerEinsteiger::class,
90        OlzMaterial::class,
91        OlzFaqDetail::class,
92        OlzFaqList::class,
93        OlzKarteDetail::class,
94        OlzKarten::class,
95        OlzNewsDetail::class,
96        OlzNewsList::class,
97        OlzRolePage::class,
98        OlzVerein::class,
99        OlzService::class,
100        OlzStartseite::class,
101        OlzSuche::class,
102        OlzTerminDetail::class,
103        OlzTermineList::class,
104        OlzTerminLocationDetail::class,
105        OlzTerminLocationsList::class,
106        OlzTerminTemplateDetail::class,
107        OlzTerminTemplatesList::class,
108        OlzUserDetail::class,
109    ];
110
111    /**
112     * @param array<string> $terms
113     *
114     * @return array<PageSearchResults>
115     */
116    public function getSearchResults(array $terms): array {
117        $results = [];
118        foreach (self::$all_page_classes as $page_class) {
119            $results[] = $this->getPageSearchResults($page_class, $terms);
120        }
121        usort($results, fn ($a, $b) => $b['bestScore'] <=> $a['bestScore']);
122        return $results;
123    }
124
125    /**
126     * @param class-string<OlzRootComponent<array<string, mixed>>> $page_class
127     * @param array<string>                                        $terms
128     *
129     * @return PageSearchResults
130     */
131    protected function getPageSearchResults(string $page_class, array $terms): array {
132        $page = new $page_class();
133        $results = $page->getSearchResults($terms);
134        usort($results, fn ($a, $b) => $b['score'] <=> $a['score']);
135        $first_result = $results[0] ?? null;
136        $best_score = $first_result['score'] ?? null;
137        return [
138            'title' => $page->getSearchTitle(),
139            'bestScore' => $best_score,
140            'results' => $results,
141        ];
142    }
143
144    /** @return array<Expression> */
145    public function getDateCriteria(string $field, string $term): array {
146        $result = $this->dateUtils()->parseDateTimeRange($term);
147        if ($result === null) {
148            return [];
149        }
150        return [Criteria::expr()->andX(
151            Criteria::expr()->gte($field, $result['start']),
152            Criteria::expr()->lt($field, $result['end']),
153        )];
154    }
155
156    /**
157     * @param array<string> $terms
158     * @param array{
159     *   link: non-empty-string,
160     *   icon?: ?non-empty-string,
161     *   date?: ?\DateTime,
162     *   title: non-empty-string,
163     * } $defaults
164     *
165     * @return array<SearchResult>
166     */
167    public function getStaticSearchResults(
168        string $content,
169        array $terms,
170        array $defaults,
171    ): array {
172        $search_space = "{$content} {$defaults['title']}";
173        $analysis = $this->analyze($search_space, $defaults['date'] ?? null, $terms);
174        if (!$analysis['hasAll']) {
175            return [];
176        }
177        return [
178            [
179                'score' => $analysis['score'],
180                'icon' => null,
181                'date' => null,
182                'text' => $this->searchUtils()->getCutout($content, $terms) ?: null,
183                ...$defaults,
184            ],
185        ];
186    }
187
188    /**
189     * @param array{
190     *   link: non-empty-string,
191     *   icon?: ?non-empty-string,
192     *   date?: ?\DateTime,
193     *   title: non-empty-string,
194     *   text?: ?non-empty-string,
195     * } $result
196     * @param array<string> $terms
197     *
198     * @return SearchResult
199     */
200    public function getScoredSearchResult(
201        array $result,
202        array $terms,
203    ): array {
204        $text_str = $result['text'] ?? '';
205        $search_space = "{$text_str} {$result['title']}";
206        $analysis = $this->analyze($search_space, $result['date'] ?? null, $terms);
207        return [
208            'icon' => null,
209            'date' => null,
210            ...$result,
211            'score' => $analysis['score'],
212            'text' => $this->searchUtils()->getCutout($text_str, $terms) ?: null,
213        ];
214    }
215
216    /** @return array<string> */
217    public function getDateFormattings(?\DateTime $date): array {
218        if ($date === null) {
219            return [];
220        }
221        return [
222            $date->format('Y-m-d'),
223            $date->format('d.m.Y'),
224            $date->format('j.n.Y'),
225        ];
226    }
227
228    /**
229     * @param array<string> $terms
230     *
231     * @return array{score: float, hasAll: bool}
232     */
233    public function analyze(string $content, ?\DateTime $date, array $terms): array {
234        $date_formattings = implode(' ', $this->getDateFormattings($date));
235        $has_all = true;
236        $sum_occurrences = 0;
237        foreach ($terms as $term) {
238            $esc_term = preg_quote($term);
239            $num_occurrences = preg_match_all("/{$esc_term}/i", $content, $matches);
240            if (preg_match("/{$esc_term}/i", $date_formattings)) {
241                $num_occurrences++;
242            }
243            $sum_occurrences += $num_occurrences;
244            if (!$num_occurrences) {
245                $has_all = false;
246            }
247        }
248        $score = round(1 - (1 / ($sum_occurrences / count($terms) + 1)), 5);
249        return ['score' => $score, 'hasAll' => $has_all];
250    }
251
252    /** @param array<string> $search_terms */
253    public function getCutout(string $text, array $search_terms): string {
254        $length_a = 40;
255        $length_b = 40;
256
257        $lowercase_text = strtolower($text);
258        $start = 0;
259        foreach ($search_terms as $search_term) {
260            $search_key = strtolower($search_term);
261            $start = strpos($lowercase_text, $search_key);
262            if ($start > 0) {
263                break;
264            }
265        }
266        $prefix = "...";
267        $suffix = "...";
268        if (($start - $length_a) < 0) {
269            $start = $length_a;
270            $prefix = "";
271        }
272        if (strlen($text) < ($length_a + $length_b)) {
273            $suffix = "";
274        }
275        $text = substr($text, $start - $length_a, $length_a + $length_b);
276        return "{$prefix}{$text}{$suffix}";
277    }
278
279    /** @param array<string> $search_terms */
280    public function highlight(string $text, array $search_terms): string {
281        $start_token = '\[';
282        $end_token = '\]';
283        $tokens = [$start_token, $end_token];
284        $text = $this->generalUtils()->escape($text, $tokens);
285        foreach ($search_terms as $term) {
286            $esc_term = preg_quote($this->generalUtils()->escape($term, $tokens), '/');
287            $text = preg_replace(
288                "/(?<!\\\\)({$esc_term})/i",
289                "{$start_token}\\1{$end_token}",
290                $text ?? '',
291            );
292        }
293        $start_tag = '<span class="highlight">';
294        $end_tag = '</span>';
295        $esc_start_token = preg_quote($start_token, '/');
296        $esc_end_token = preg_quote($end_token, '/');
297        $text = preg_replace(
298            ["/(?<!\\\\){$esc_start_token}/", "/(?<!\\\\){$esc_end_token}/"],
299            [$start_tag, $end_tag],
300            $text ?? '',
301        );
302        return $this->generalUtils()->unescape($text ?? '', $tokens);
303    }
304
305    public static function fromEnv(): self {
306        return new self();
307    }
308}