Remove duplicate entries, case-insensitive ward name matches and better override system.

This commit is contained in:
Kim Taylor
2024-10-03 23:01:50 +10:00
parent 6eef04e89f
commit abf5147c79
2 changed files with 26 additions and 6 deletions

View File

@@ -61,6 +61,8 @@ foreach ($lga_list as $lga) {
if (count($lga_candidates) === 0) continue;
remove_duplicates($lga_candidates);
$dir = dirname($lga['config-file']);
$dir_files = scandir($dir);
$output_file = $dir."/candidates-generic.csv";
@@ -118,11 +120,10 @@ foreach ($lga_list as $lga) {
foreach ($overrides as $override) {
foreach ($lines as $line_key => $line) {
foreach ($header as $index => $field) {
if (($override['Field'] === $field) &&
($line[$index] === $override['Old'])) {
$lines[$line_key][$index] = $override['New'];
}
$match_index = array_search($override['Match Field'], $header);
$replace_index = array_search($override['Replace Field'], $header);
if ($line[$match_index] === $override['Match Value']) {
$lines[$line_key][$replace_index] = $override['Replace Value'];
}
}
}

View File

@@ -75,7 +75,7 @@ function match_lga(&$candidate_data, $lga_list) {
$max_score = 0;
foreach ($match_lga['wardNames'] as $ward) {
similar_text($ward, $candidate['Ward'], $score);
similar_text(strtolower($ward), strtolower($candidate['Ward']), $score);
if ($score >= $max_score) {
$max_score = $score;
$match_ward = $ward;
@@ -86,3 +86,22 @@ function match_lga(&$candidate_data, $lga_list) {
$candidate['match_ward'] = $match_ward;
}
}
function remove_duplicates(&$candidate_data) {
$names = [];
$duplicates = [];
foreach ($candidate_data as $candidate_key => $candidate) {
/* If we've already had this name, remove the old entry */
foreach ($names as $name_key => $name) {
similar_text(strtolower($name), strtolower($candidate['Name']), $score);
if ($score > 90) {
$duplicates[] = $name_key;
}
}
$names[$candidate_key] = $candidate['Name'];
}
$duplicates = array_unique($duplicates);
foreach ($duplicates as $duplicate) {
unset($candidate_data[$duplicate]);
}
}