Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
99.05% |
104 / 105 |
|
90.00% |
9 / 10 |
CRAP | |
0.00% |
0 / 1 |
SearchUtils | |
99.05% |
104 / 105 |
|
90.00% |
9 / 10 |
24 | |
0.00% |
0 / 1 |
getSearchResults | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
getPageSearchResults | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
getDateCriteria | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
getStaticSearchResults | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
3 | |||
getScoredSearchResult | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
getDateFormattings | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
analyze | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
4 | |||
getCutout | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
5 | |||
highlight | |
100.00% |
21 / 21 |
|
100.00% |
1 / 1 |
2 | |||
fromEnv | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace Olz\Suche\Utils; |
4 | |
5 | use Doctrine\Common\Collections\Criteria; |
6 | use Doctrine\Common\Collections\Expr\Expression; |
7 | use Olz\Apps\Anmelden\Components\OlzAnmelden\OlzAnmelden; |
8 | use Olz\Apps\Commands\Components\OlzCommands\OlzCommands; |
9 | use Olz\Apps\Files\Components\OlzFiles\OlzFiles; |
10 | use Olz\Apps\Logs\Components\OlzLogs\OlzLogs; |
11 | use Olz\Apps\Members\Components\OlzMembers\OlzMembers; |
12 | use Olz\Apps\Monitoring\Components\OlzMonitoring\OlzMonitoring; |
13 | use Olz\Apps\Newsletter\Components\OlzNewsletter\OlzNewsletter; |
14 | use Olz\Apps\Oev\Components\OlzOev\OlzOev; |
15 | use Olz\Apps\Panini2024\Components\OlzPanini2024\OlzPanini2024; |
16 | use Olz\Apps\Panini2024\Components\OlzPanini2024All\OlzPanini2024All; |
17 | use Olz\Apps\Panini2024\Components\OlzPanini2024Masks\OlzPanini2024Masks; |
18 | use Olz\Apps\Quiz\Components\OlzQuiz\OlzQuiz; |
19 | use Olz\Apps\Results\Components\OlzResults\OlzResults; |
20 | use Olz\Apps\SearchEngines\Components\OlzSearchEngines\OlzSearchEngines; |
21 | use Olz\Apps\Statistics\Components\OlzStatistics\OlzStatistics; |
22 | use Olz\Apps\Youtube\Components\OlzYoutube\OlzYoutube; |
23 | use Olz\Components\Auth\OlzEmailReaktion\OlzEmailReaktion; |
24 | use Olz\Components\Common\OlzRootComponent; |
25 | use Olz\Components\OlzHtmlSitemap\OlzHtmlSitemap; |
26 | use Olz\Components\OtherPages\OlzDatenschutz\OlzDatenschutz; |
27 | use Olz\Components\OtherPages\OlzFuerEinsteiger\OlzFuerEinsteiger; |
28 | use Olz\Components\OtherPages\OlzMaterial\OlzMaterial; |
29 | use Olz\Faq\Components\OlzFaqDetail\OlzFaqDetail; |
30 | use Olz\Faq\Components\OlzFaqList\OlzFaqList; |
31 | use Olz\Karten\Components\OlzKarteDetail\OlzKarteDetail; |
32 | use Olz\Karten\Components\OlzKarten\OlzKarten; |
33 | use Olz\News\Components\OlzNewsDetail\OlzNewsDetail; |
34 | use Olz\News\Components\OlzNewsList\OlzNewsList; |
35 | use Olz\Roles\Components\OlzRolePage\OlzRolePage; |
36 | use Olz\Roles\Components\OlzVerein\OlzVerein; |
37 | use Olz\Service\Components\OlzService\OlzService; |
38 | use Olz\Startseite\Components\OlzStartseite\OlzStartseite; |
39 | use Olz\Suche\Components\OlzSuche\OlzSuche; |
40 | use Olz\Termine\Components\OlzTerminDetail\OlzTerminDetail; |
41 | use Olz\Termine\Components\OlzTermineList\OlzTermineList; |
42 | use Olz\Termine\Components\OlzTerminLocationDetail\OlzTerminLocationDetail; |
43 | use Olz\Termine\Components\OlzTerminLocationsList\OlzTerminLocationsList; |
44 | use Olz\Termine\Components\OlzTerminTemplateDetail\OlzTerminTemplateDetail; |
45 | use Olz\Termine\Components\OlzTerminTemplatesList\OlzTerminTemplatesList; |
46 | use Olz\Users\Components\OlzUserDetail\OlzUserDetail; |
47 | use Olz\Utils\WithUtilsTrait; |
48 | |
49 | /** |
50 | * @phpstan-type SearchResult array{ |
51 | * score: float, |
52 | * link: non-empty-string, |
53 | * icon: ?non-empty-string, |
54 | * date: ?\DateTime, |
55 | * title: non-empty-string, |
56 | * text: ?non-empty-string, |
57 | * } |
58 | * @phpstan-type PageSearchResults array{ |
59 | * title: non-empty-string, |
60 | * bestScore: ?float, |
61 | * results: array<SearchResult>, |
62 | * } |
63 | */ |
64 | class SearchUtils { |
65 | use WithUtilsTrait; |
66 | |
67 | /** @var array<class-string<OlzRootComponent<mixed>>> */ |
68 | protected static array $all_page_classes = [ |
69 | // All classes that extend `OlzRootComponent` should be listed here: |
70 | OlzAnmelden::class, |
71 | OlzCommands::class, |
72 | OlzFiles::class, |
73 | OlzLogs::class, |
74 | OlzMembers::class, |
75 | OlzMonitoring::class, |
76 | OlzNewsletter::class, |
77 | OlzOev::class, |
78 | OlzPanini2024::class, |
79 | OlzPanini2024All::class, |
80 | OlzPanini2024Masks::class, |
81 | OlzQuiz::class, |
82 | OlzResults::class, |
83 | OlzSearchEngines::class, |
84 | OlzStatistics::class, |
85 | OlzYoutube::class, |
86 | OlzEmailReaktion::class, |
87 | OlzHtmlSitemap::class, |
88 | OlzDatenschutz::class, |
89 | OlzFuerEinsteiger::class, |
90 | OlzMaterial::class, |
91 | OlzFaqDetail::class, |
92 | OlzFaqList::class, |
93 | OlzKarteDetail::class, |
94 | OlzKarten::class, |
95 | OlzNewsDetail::class, |
96 | OlzNewsList::class, |
97 | OlzRolePage::class, |
98 | OlzVerein::class, |
99 | OlzService::class, |
100 | OlzStartseite::class, |
101 | OlzSuche::class, |
102 | OlzTerminDetail::class, |
103 | OlzTermineList::class, |
104 | OlzTerminLocationDetail::class, |
105 | OlzTerminLocationsList::class, |
106 | OlzTerminTemplateDetail::class, |
107 | OlzTerminTemplatesList::class, |
108 | OlzUserDetail::class, |
109 | ]; |
110 | |
111 | /** |
112 | * @param array<string> $terms |
113 | * |
114 | * @return array<PageSearchResults> |
115 | */ |
116 | public function getSearchResults(array $terms): array { |
117 | $results = []; |
118 | foreach (self::$all_page_classes as $page_class) { |
119 | $results[] = $this->getPageSearchResults($page_class, $terms); |
120 | } |
121 | usort($results, fn ($a, $b) => $b['bestScore'] <=> $a['bestScore']); |
122 | return $results; |
123 | } |
124 | |
125 | /** |
126 | * @param class-string<OlzRootComponent<array<string, mixed>>> $page_class |
127 | * @param array<string> $terms |
128 | * |
129 | * @return PageSearchResults |
130 | */ |
131 | protected function getPageSearchResults(string $page_class, array $terms): array { |
132 | $page = new $page_class(); |
133 | $results = $page->getSearchResults($terms); |
134 | usort($results, fn ($a, $b) => $b['score'] <=> $a['score']); |
135 | $first_result = $results[0] ?? null; |
136 | $best_score = $first_result['score'] ?? null; |
137 | return [ |
138 | 'title' => $page->getSearchTitle(), |
139 | 'bestScore' => $best_score, |
140 | 'results' => $results, |
141 | ]; |
142 | } |
143 | |
144 | /** @return array<Expression> */ |
145 | public function getDateCriteria(string $field, string $term): array { |
146 | $result = $this->dateUtils()->parseDateTimeRange($term); |
147 | if ($result === null) { |
148 | return []; |
149 | } |
150 | return [Criteria::expr()->andX( |
151 | Criteria::expr()->gte($field, $result['start']), |
152 | Criteria::expr()->lt($field, $result['end']), |
153 | )]; |
154 | } |
155 | |
156 | /** |
157 | * @param array<string> $terms |
158 | * @param array{ |
159 | * link: non-empty-string, |
160 | * icon?: ?non-empty-string, |
161 | * date?: ?\DateTime, |
162 | * title: non-empty-string, |
163 | * } $defaults |
164 | * |
165 | * @return array<SearchResult> |
166 | */ |
167 | public function getStaticSearchResults( |
168 | string $content, |
169 | array $terms, |
170 | array $defaults, |
171 | ): array { |
172 | $search_space = "{$content} {$defaults['title']}"; |
173 | $analysis = $this->analyze($search_space, $defaults['date'] ?? null, $terms); |
174 | if (!$analysis['hasAll']) { |
175 | return []; |
176 | } |
177 | return [ |
178 | [ |
179 | 'score' => $analysis['score'], |
180 | 'icon' => null, |
181 | 'date' => null, |
182 | 'text' => $this->searchUtils()->getCutout($content, $terms) ?: null, |
183 | ...$defaults, |
184 | ], |
185 | ]; |
186 | } |
187 | |
188 | /** |
189 | * @param array{ |
190 | * link: non-empty-string, |
191 | * icon?: ?non-empty-string, |
192 | * date?: ?\DateTime, |
193 | * title: non-empty-string, |
194 | * text?: ?non-empty-string, |
195 | * } $result |
196 | * @param array<string> $terms |
197 | * |
198 | * @return SearchResult |
199 | */ |
200 | public function getScoredSearchResult( |
201 | array $result, |
202 | array $terms, |
203 | ): array { |
204 | $text_str = $result['text'] ?? ''; |
205 | $search_space = "{$text_str} {$result['title']}"; |
206 | $analysis = $this->analyze($search_space, $result['date'] ?? null, $terms); |
207 | return [ |
208 | 'icon' => null, |
209 | 'date' => null, |
210 | ...$result, |
211 | 'score' => $analysis['score'], |
212 | 'text' => $this->searchUtils()->getCutout($text_str, $terms) ?: null, |
213 | ]; |
214 | } |
215 | |
216 | /** @return array<string> */ |
217 | public function getDateFormattings(?\DateTime $date): array { |
218 | if ($date === null) { |
219 | return []; |
220 | } |
221 | return [ |
222 | $date->format('Y-m-d'), |
223 | $date->format('d.m.Y'), |
224 | $date->format('j.n.Y'), |
225 | ]; |
226 | } |
227 | |
228 | /** |
229 | * @param array<string> $terms |
230 | * |
231 | * @return array{score: float, hasAll: bool} |
232 | */ |
233 | public function analyze(string $content, ?\DateTime $date, array $terms): array { |
234 | $date_formattings = implode(' ', $this->getDateFormattings($date)); |
235 | $has_all = true; |
236 | $sum_occurrences = 0; |
237 | foreach ($terms as $term) { |
238 | $esc_term = preg_quote($term); |
239 | $num_occurrences = preg_match_all("/{$esc_term}/i", $content, $matches); |
240 | if (preg_match("/{$esc_term}/i", $date_formattings)) { |
241 | $num_occurrences++; |
242 | } |
243 | $sum_occurrences += $num_occurrences; |
244 | if (!$num_occurrences) { |
245 | $has_all = false; |
246 | } |
247 | } |
248 | $score = round(1 - (1 / ($sum_occurrences / count($terms) + 1)), 5); |
249 | return ['score' => $score, 'hasAll' => $has_all]; |
250 | } |
251 | |
252 | /** @param array<string> $search_terms */ |
253 | public function getCutout(string $text, array $search_terms): string { |
254 | $length_a = 40; |
255 | $length_b = 40; |
256 | |
257 | $lowercase_text = strtolower($text); |
258 | $start = 0; |
259 | foreach ($search_terms as $search_term) { |
260 | $search_key = strtolower($search_term); |
261 | $start = strpos($lowercase_text, $search_key); |
262 | if ($start > 0) { |
263 | break; |
264 | } |
265 | } |
266 | $prefix = "..."; |
267 | $suffix = "..."; |
268 | if (($start - $length_a) < 0) { |
269 | $start = $length_a; |
270 | $prefix = ""; |
271 | } |
272 | if (strlen($text) < ($length_a + $length_b)) { |
273 | $suffix = ""; |
274 | } |
275 | $text = substr($text, $start - $length_a, $length_a + $length_b); |
276 | return "{$prefix}{$text}{$suffix}"; |
277 | } |
278 | |
279 | /** @param array<string> $search_terms */ |
280 | public function highlight(string $text, array $search_terms): string { |
281 | $start_token = '\['; |
282 | $end_token = '\]'; |
283 | $tokens = [$start_token, $end_token]; |
284 | $text = $this->generalUtils()->escape($text, $tokens); |
285 | foreach ($search_terms as $term) { |
286 | $esc_term = preg_quote($this->generalUtils()->escape($term, $tokens), '/'); |
287 | $text = preg_replace( |
288 | "/(?<!\\\\)({$esc_term})/i", |
289 | "{$start_token}\\1{$end_token}", |
290 | $text ?? '', |
291 | ); |
292 | } |
293 | $start_tag = '<span class="highlight">'; |
294 | $end_tag = '</span>'; |
295 | $esc_start_token = preg_quote($start_token, '/'); |
296 | $esc_end_token = preg_quote($end_token, '/'); |
297 | $text = preg_replace( |
298 | ["/(?<!\\\\){$esc_start_token}/", "/(?<!\\\\){$esc_end_token}/"], |
299 | [$start_tag, $end_tag], |
300 | $text ?? '', |
301 | ); |
302 | return $this->generalUtils()->unescape($text ?? '', $tokens); |
303 | } |
304 | |
305 | public static function fromEnv(): self { |
306 | return new self(); |
307 | } |
308 | } |