Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
98.31% |
116 / 118 |
|
80.00% |
8 / 10 |
CRAP | |
0.00% |
0 / 1 |
| SyncSolvAssignPeopleCommand | |
98.31% |
116 / 118 |
|
80.00% |
8 / 10 |
28 | |
0.00% |
0 / 1 |
| getAllowedAppEnvs | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| handle | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| assignSolvPeople | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
4 | |||
| findOrCreateSolvPerson | |
100.00% |
34 / 34 |
|
100.00% |
1 / 1 |
3 | |||
| getMatchingPerson | |
100.00% |
21 / 21 |
|
100.00% |
1 / 1 |
4 | |||
| getUnambiguousPerson | |
90.00% |
9 / 10 |
|
0.00% |
0 / 1 |
5.03 | |||
| getClosestMatchesOfPersonInfo | |
100.00% |
20 / 20 |
|
100.00% |
1 / 1 |
4 | |||
| getDifferenceBetweenPersonInfo | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
| occasionallyFlush | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
| forceFlush | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace Olz\Command; |
| 4 | |
| 5 | use Olz\Command\Common\OlzCommand; |
| 6 | use Olz\Entity\SolvPerson; |
| 7 | use Olz\Entity\SolvResult; |
| 8 | use Symfony\Component\Console\Attribute\AsCommand; |
| 9 | use Symfony\Component\Console\Command\Command; |
| 10 | use Symfony\Component\Console\Input\InputInterface; |
| 11 | use Symfony\Component\Console\Output\OutputInterface; |
| 12 | |
| 13 | #[AsCommand(name: 'olz:sync-solv-assign-people')] |
| 14 | class SyncSolvAssignPeopleCommand extends OlzCommand { |
| 15 | /** @return array<string> */ |
| 16 | protected function getAllowedAppEnvs(): array { |
| 17 | return ['dev', 'test', 'staging', 'prod']; |
| 18 | } |
| 19 | |
| 20 | protected function handle(InputInterface $input, OutputInterface $output): int { |
| 21 | $this->assignSolvPeople(); |
| 22 | return Command::SUCCESS; |
| 23 | } |
| 24 | |
| 25 | public function assignSolvPeople(): void { |
| 26 | $solv_result_repo = $this->entityManager()->getRepository(SolvResult::class); |
| 27 | $solv_results = $solv_result_repo->getUnassignedSolvResults(); |
| 28 | foreach ($solv_results as $solv_result) { |
| 29 | $person = $solv_result_repo->getExactPersonId($solv_result); |
| 30 | if ($person == 0) { |
| 31 | $this->logAndOutput("\n---\n"); |
| 32 | $this->logAndOutput("Person not exactly matched: {$solv_result}"); |
| 33 | $person = $this->findOrCreateSolvPerson($solv_result); |
| 34 | } |
| 35 | if ($person != 0) { |
| 36 | $solv_result->setPerson($person); |
| 37 | $this->occasionallyFlush(); |
| 38 | } |
| 39 | } |
| 40 | $this->forceFlush(); |
| 41 | } |
| 42 | |
| 43 | private function findOrCreateSolvPerson(SolvResult $solv_result): int { |
| 44 | $solv_result_repo = $this->entityManager()->getRepository(SolvResult::class); |
| 45 | [$solv_result_data, $msg] = $this->generalUtils()->measureLatency( |
| 46 | function () use ($solv_result_repo) { |
| 47 | return $solv_result_repo->getAllAssignedSolvResultPersonData(); |
| 48 | } |
| 49 | ); |
| 50 | $this->logAndOutput("getAllAssignedSolvResultPersonData {$msg}"); |
| 51 | |
| 52 | [$person_id, $msg] = $this->generalUtils()->measureLatency( |
| 53 | function () use ($solv_result, $solv_result_data) { |
| 54 | return $this->getMatchingPerson( |
| 55 | $solv_result->getName(), |
| 56 | $solv_result->getBirthYear(), |
| 57 | $solv_result->getDomicile(), |
| 58 | $solv_result_data |
| 59 | ); |
| 60 | } |
| 61 | ); |
| 62 | $this->logAndOutput("getMatchingPerson {$msg}"); |
| 63 | |
| 64 | if ($person_id !== null) { |
| 65 | return $person_id; |
| 66 | } |
| 67 | $solv_person = new SolvPerson(); |
| 68 | $solv_person->setSameAs(null); |
| 69 | $solv_person->setName($solv_result->getName()); |
| 70 | $solv_person->setBirthYear($solv_result->getBirthYear()); |
| 71 | $solv_person->setDomicile($solv_result->getDomicile()); |
| 72 | $solv_person->setMember(1); |
| 73 | $this->entityManager()->persist($solv_person); |
| 74 | // This is necessary, s.t. getExactPersonId works correctly for the next iteration. |
| 75 | $this->forceFlush(); |
| 76 | $insert_id = $solv_person->getId(); |
| 77 | $this->generalUtils()->checkNotNull($insert_id, "No SolvPerson ID"); |
| 78 | |
| 79 | $person_str = json_encode($solv_person, JSON_PRETTY_PRINT) ?: ''; |
| 80 | $this->logAndOutput("Created new person (id {$insert_id}):"); |
| 81 | $this->logAndOutput($person_str); |
| 82 | return $insert_id; |
| 83 | } |
| 84 | |
| 85 | /** @param array<array{person: int, name: string, birth_year: string, domicile: string}> $person_infos */ |
| 86 | public function getMatchingPerson( |
| 87 | string $name, |
| 88 | string $birth_year, |
| 89 | string $domicile, |
| 90 | array $person_infos |
| 91 | ): ?int { |
| 92 | $closest_matches = $this->getClosestMatchesOfPersonInfo( |
| 93 | $name, |
| 94 | $birth_year, |
| 95 | $domicile, |
| 96 | $person_infos, |
| 97 | ); |
| 98 | $least_difference = $closest_matches['difference']; |
| 99 | $person_infos_with_least_difference = $closest_matches['matches']; |
| 100 | $pretty_matches = json_encode($person_infos_with_least_difference, JSON_PRETTY_PRINT); |
| 101 | $this->logAndOutput("Closest matches (difference {$least_difference}): {$pretty_matches}"); |
| 102 | if ($least_difference >= 3) { |
| 103 | $this->logAndOutput(" => No matching person found (difference too high)."); |
| 104 | if ($least_difference < 6) { |
| 105 | $this->logAndOutput("Unclear case. Maybe update logic?", level: 'notice'); |
| 106 | } |
| 107 | return null; |
| 108 | } |
| 109 | $unambiguous_person = $this->getUnambiguousPerson($person_infos_with_least_difference); |
| 110 | if ($unambiguous_person === null) { |
| 111 | $this->logAndOutput(" => No matching person found (closest matches contain different persons)."); |
| 112 | return null; |
| 113 | } |
| 114 | $this->logAndOutput(" => Matching person found: {$unambiguous_person}."); |
| 115 | return $unambiguous_person; |
| 116 | } |
| 117 | |
| 118 | /** @param array<array{person?: int, name?: string, birth_year?: string, domicile?: string}> $person_infos */ |
| 119 | public function getUnambiguousPerson(array $person_infos): ?int { |
| 120 | if (count($person_infos) == 0) { |
| 121 | return null; |
| 122 | } |
| 123 | $person_id = $person_infos[0]['person'] ?? null; |
| 124 | if ($person_id === null) { |
| 125 | return null; |
| 126 | } |
| 127 | $suggested_person_id = intval($person_id); |
| 128 | foreach ($person_infos as $person_info) { |
| 129 | if (intval($person_info['person'] ?? null) != $suggested_person_id) { |
| 130 | return null; // there is no unambiguous person |
| 131 | } |
| 132 | } |
| 133 | return $suggested_person_id; |
| 134 | } |
| 135 | |
| 136 | /** |
| 137 | * @param array<array{person?: int, name: string, birth_year: string, domicile: string}> $person_infos |
| 138 | * |
| 139 | * @return array{difference: int, matches: array<array{person?: int, name: string, birth_year: string, domicile: string}>} |
| 140 | */ |
| 141 | public function getClosestMatchesOfPersonInfo( |
| 142 | string $name, |
| 143 | string $birth_year, |
| 144 | string $domicile, |
| 145 | array $person_infos |
| 146 | ): array { |
| 147 | $least_difference = strlen($name); |
| 148 | $person_infos_with_least_difference = []; |
| 149 | foreach ($person_infos as $row) { |
| 150 | $difference = $this->getDifferenceBetweenPersonInfo( |
| 151 | $name, |
| 152 | $birth_year, |
| 153 | $domicile, |
| 154 | $row['name'], |
| 155 | $row['birth_year'], |
| 156 | $row['domicile'] |
| 157 | ); |
| 158 | if ($difference < $least_difference) { |
| 159 | $least_difference = $difference; |
| 160 | $person_infos_with_least_difference = [$row]; |
| 161 | } elseif ($difference == $least_difference) { |
| 162 | $person_infos_with_least_difference[] = $row; |
| 163 | } |
| 164 | } |
| 165 | return [ |
| 166 | 'difference' => $least_difference, |
| 167 | 'matches' => $person_infos_with_least_difference, |
| 168 | ]; |
| 169 | } |
| 170 | |
| 171 | public function getDifferenceBetweenPersonInfo( |
| 172 | string $name_1, |
| 173 | string $birth_year_1, |
| 174 | string $domicile_1, |
| 175 | string $name_2, |
| 176 | string $birth_year_2, |
| 177 | string $domicile_2 |
| 178 | ): int { |
| 179 | $name_difference = levenshtein($name_1, $name_2); |
| 180 | $int_birth_year_1 = intval($birth_year_1); |
| 181 | $int_birth_year_2 = intval($birth_year_2); |
| 182 | $birth_year_difference = levenshtein("{$int_birth_year_1}", "{$int_birth_year_2}"); |
| 183 | $trim_domicile_1 = trim($domicile_1); |
| 184 | $trim_domicile_2 = trim($domicile_2); |
| 185 | $domicile_difference = levenshtein($trim_domicile_1, $trim_domicile_2); |
| 186 | if ($trim_domicile_1 == '' || $trim_domicile_2 == '') { |
| 187 | $domicile_difference = min($domicile_difference, 2); |
| 188 | } |
| 189 | return $name_difference + $birth_year_difference + $domicile_difference; |
| 190 | } |
| 191 | |
| 192 | protected int $num_updates = 0; |
| 193 | protected int $flush_every = 1000; |
| 194 | |
| 195 | protected function occasionallyFlush(): void { |
| 196 | $this->num_updates++; |
| 197 | if ($this->num_updates > $this->flush_every) { |
| 198 | $this->forceFlush(); |
| 199 | } |
| 200 | } |
| 201 | |
| 202 | protected function forceFlush(): void { |
| 203 | [, $msg] = $this->generalUtils()->measureLatency(function () { |
| 204 | $this->entityManager()->flush(); |
| 205 | $this->num_updates = 0; |
| 206 | }); |
| 207 | $this->logAndOutput("forceFlush {$msg}"); |
| 208 | } |
| 209 | } |