Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
98.31% |
116 / 118 |
|
80.00% |
8 / 10 |
CRAP | |
0.00% |
0 / 1 |
SyncSolvAssignPeopleCommand | |
98.31% |
116 / 118 |
|
80.00% |
8 / 10 |
28 | |
0.00% |
0 / 1 |
getAllowedAppEnvs | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
handle | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
assignSolvPeople | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
4 | |||
findOrCreateSolvPerson | |
100.00% |
34 / 34 |
|
100.00% |
1 / 1 |
3 | |||
getMatchingPerson | |
100.00% |
21 / 21 |
|
100.00% |
1 / 1 |
4 | |||
getUnambiguousPerson | |
90.00% |
9 / 10 |
|
0.00% |
0 / 1 |
5.03 | |||
getClosestMatchesOfPersonInfo | |
100.00% |
20 / 20 |
|
100.00% |
1 / 1 |
4 | |||
getDifferenceBetweenPersonInfo | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
occasionallyFlush | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
forceFlush | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace Olz\Command; |
4 | |
5 | use Olz\Command\Common\OlzCommand; |
6 | use Olz\Entity\SolvPerson; |
7 | use Olz\Entity\SolvResult; |
8 | use Symfony\Component\Console\Attribute\AsCommand; |
9 | use Symfony\Component\Console\Command\Command; |
10 | use Symfony\Component\Console\Input\InputInterface; |
11 | use Symfony\Component\Console\Output\OutputInterface; |
12 | |
13 | #[AsCommand(name: 'olz:sync-solv-assign-people')] |
14 | class SyncSolvAssignPeopleCommand extends OlzCommand { |
15 | /** @return array<string> */ |
16 | protected function getAllowedAppEnvs(): array { |
17 | return ['dev', 'test', 'staging', 'prod']; |
18 | } |
19 | |
20 | protected function handle(InputInterface $input, OutputInterface $output): int { |
21 | $this->assignSolvPeople(); |
22 | return Command::SUCCESS; |
23 | } |
24 | |
25 | public function assignSolvPeople(): void { |
26 | $solv_result_repo = $this->entityManager()->getRepository(SolvResult::class); |
27 | $solv_results = $solv_result_repo->getUnassignedSolvResults(); |
28 | foreach ($solv_results as $solv_result) { |
29 | $person = $solv_result_repo->getExactPersonId($solv_result); |
30 | if ($person == 0) { |
31 | $this->logAndOutput("\n---\n"); |
32 | $this->logAndOutput("Person not exactly matched: {$solv_result}"); |
33 | $person = $this->findOrCreateSolvPerson($solv_result); |
34 | } |
35 | if ($person != 0) { |
36 | $solv_result->setPerson($person); |
37 | $this->occasionallyFlush(); |
38 | } |
39 | } |
40 | $this->forceFlush(); |
41 | } |
42 | |
43 | private function findOrCreateSolvPerson(SolvResult $solv_result): int { |
44 | $solv_result_repo = $this->entityManager()->getRepository(SolvResult::class); |
45 | [$solv_result_data, $msg] = $this->generalUtils()->measureLatency( |
46 | function () use ($solv_result_repo) { |
47 | return $solv_result_repo->getAllAssignedSolvResultPersonData(); |
48 | } |
49 | ); |
50 | $this->logAndOutput("getAllAssignedSolvResultPersonData {$msg}"); |
51 | |
52 | [$person_id, $msg] = $this->generalUtils()->measureLatency( |
53 | function () use ($solv_result, $solv_result_data) { |
54 | return $this->getMatchingPerson( |
55 | $solv_result->getName(), |
56 | $solv_result->getBirthYear(), |
57 | $solv_result->getDomicile(), |
58 | $solv_result_data |
59 | ); |
60 | } |
61 | ); |
62 | $this->logAndOutput("getMatchingPerson {$msg}"); |
63 | |
64 | if ($person_id !== null) { |
65 | return $person_id; |
66 | } |
67 | $solv_person = new SolvPerson(); |
68 | $solv_person->setSameAs(null); |
69 | $solv_person->setName($solv_result->getName()); |
70 | $solv_person->setBirthYear($solv_result->getBirthYear()); |
71 | $solv_person->setDomicile($solv_result->getDomicile()); |
72 | $solv_person->setMember(1); |
73 | $this->entityManager()->persist($solv_person); |
74 | // This is necessary, s.t. getExactPersonId works correctly for the next iteration. |
75 | $this->forceFlush(); |
76 | $insert_id = $solv_person->getId(); |
77 | $this->generalUtils()->checkNotNull($insert_id, "No SolvPerson ID"); |
78 | |
79 | $person_str = json_encode($solv_person, JSON_PRETTY_PRINT) ?: ''; |
80 | $this->logAndOutput("Created new person (id {$insert_id}):"); |
81 | $this->logAndOutput($person_str); |
82 | return $insert_id; |
83 | } |
84 | |
85 | /** @param array<array{person: int, name: string, birth_year: string, domicile: string}> $person_infos */ |
86 | public function getMatchingPerson( |
87 | string $name, |
88 | string $birth_year, |
89 | string $domicile, |
90 | array $person_infos |
91 | ): ?int { |
92 | $closest_matches = $this->getClosestMatchesOfPersonInfo( |
93 | $name, |
94 | $birth_year, |
95 | $domicile, |
96 | $person_infos, |
97 | ); |
98 | $least_difference = $closest_matches['difference']; |
99 | $person_infos_with_least_difference = $closest_matches['matches']; |
100 | $pretty_matches = json_encode($person_infos_with_least_difference, JSON_PRETTY_PRINT); |
101 | $this->logAndOutput("Closest matches (difference {$least_difference}): {$pretty_matches}"); |
102 | if ($least_difference >= 3) { |
103 | $this->logAndOutput(" => No matching person found (difference too high)."); |
104 | if ($least_difference < 6) { |
105 | $this->logAndOutput("Unclear case. Maybe update logic?", level: 'notice'); |
106 | } |
107 | return null; |
108 | } |
109 | $unambiguous_person = $this->getUnambiguousPerson($person_infos_with_least_difference); |
110 | if ($unambiguous_person === null) { |
111 | $this->logAndOutput(" => No matching person found (closest matches contain different persons)."); |
112 | return null; |
113 | } |
114 | $this->logAndOutput(" => Matching person found: {$unambiguous_person}."); |
115 | return $unambiguous_person; |
116 | } |
117 | |
118 | /** @param array<array{person?: int, name?: string, birth_year?: string, domicile?: string}> $person_infos */ |
119 | public function getUnambiguousPerson(array $person_infos): ?int { |
120 | if (count($person_infos) == 0) { |
121 | return null; |
122 | } |
123 | $person_id = $person_infos[0]['person'] ?? null; |
124 | if ($person_id === null) { |
125 | return null; |
126 | } |
127 | $suggested_person_id = intval($person_id); |
128 | foreach ($person_infos as $person_info) { |
129 | if (intval($person_info['person'] ?? null) != $suggested_person_id) { |
130 | return null; // there is no unambiguous person |
131 | } |
132 | } |
133 | return $suggested_person_id; |
134 | } |
135 | |
136 | /** |
137 | * @param array<array{person?: int, name: string, birth_year: string, domicile: string}> $person_infos |
138 | * |
139 | * @return array{difference: int, matches: array<array{person?: int, name: string, birth_year: string, domicile: string}>} |
140 | */ |
141 | public function getClosestMatchesOfPersonInfo( |
142 | string $name, |
143 | string $birth_year, |
144 | string $domicile, |
145 | array $person_infos |
146 | ): array { |
147 | $least_difference = strlen($name); |
148 | $person_infos_with_least_difference = []; |
149 | foreach ($person_infos as $row) { |
150 | $difference = $this->getDifferenceBetweenPersonInfo( |
151 | $name, |
152 | $birth_year, |
153 | $domicile, |
154 | $row['name'], |
155 | $row['birth_year'], |
156 | $row['domicile'] |
157 | ); |
158 | if ($difference < $least_difference) { |
159 | $least_difference = $difference; |
160 | $person_infos_with_least_difference = [$row]; |
161 | } elseif ($difference == $least_difference) { |
162 | $person_infos_with_least_difference[] = $row; |
163 | } |
164 | } |
165 | return [ |
166 | 'difference' => $least_difference, |
167 | 'matches' => $person_infos_with_least_difference, |
168 | ]; |
169 | } |
170 | |
171 | public function getDifferenceBetweenPersonInfo( |
172 | string $name_1, |
173 | string $birth_year_1, |
174 | string $domicile_1, |
175 | string $name_2, |
176 | string $birth_year_2, |
177 | string $domicile_2 |
178 | ): int { |
179 | $name_difference = levenshtein($name_1, $name_2); |
180 | $int_birth_year_1 = intval($birth_year_1); |
181 | $int_birth_year_2 = intval($birth_year_2); |
182 | $birth_year_difference = levenshtein("{$int_birth_year_1}", "{$int_birth_year_2}"); |
183 | $trim_domicile_1 = trim($domicile_1); |
184 | $trim_domicile_2 = trim($domicile_2); |
185 | $domicile_difference = levenshtein($trim_domicile_1, $trim_domicile_2); |
186 | if ($trim_domicile_1 == '' || $trim_domicile_2 == '') { |
187 | $domicile_difference = min($domicile_difference, 2); |
188 | } |
189 | return $name_difference + $birth_year_difference + $domicile_difference; |
190 | } |
191 | |
192 | protected int $num_updates = 0; |
193 | protected int $flush_every = 1000; |
194 | |
195 | protected function occasionallyFlush(): void { |
196 | $this->num_updates++; |
197 | if ($this->num_updates > $this->flush_every) { |
198 | $this->forceFlush(); |
199 | } |
200 | } |
201 | |
202 | protected function forceFlush(): void { |
203 | [, $msg] = $this->generalUtils()->measureLatency(function () { |
204 | $this->entityManager()->flush(); |
205 | $this->num_updates = 0; |
206 | }); |
207 | $this->logAndOutput("forceFlush {$msg}"); |
208 | } |
209 | } |