Generate candidates-generic.csv files based on fuzzy match with LGA/Ward.

This commit is contained in:
Kim Taylor
2024-09-22 19:46:39 +10:00
parent 2f5806a227
commit 219b242503
3 changed files with 131 additions and 6 deletions

View File

@@ -0,0 +1,20 @@
<?php
require_once("parse_generic_csv.php");
$options = getopt("", ["generic-csv:"]);
if (isset($options['generic-csv'])) {
$generic_csv = $options['generic-csv'];
} else {
error_log("Error: Missing required option '--generic-csv'.");
exit(1);
}
$candidate_data = parse_generic_csv($generic_csv);
$json_data = json_encode($candidate_data);
print_r($json_data);
exit(0);

View File

@@ -2,7 +2,7 @@
require_once("parse_generic_csv.php");
$options = getopt("", ["generic-csv:"]);
$options = getopt("", ["generic-csv:", "config-files:"]);
if (isset($options['generic-csv'])) {
$generic_csv = $options['generic-csv'];
@@ -11,8 +11,30 @@ if (isset($options['generic-csv'])) {
exit(1);
}
if (isset($options['config-files'])) {
$config_files = $options['config-files'];
} else {
error_log("Error: Missing required option '--config-files'.");
exit(1);
}
$config_files = explode(" ", $config_files);
$candidate_data = parse_generic_csv($generic_csv);
$lga_list = [];
/* Generate dictionary of LGAs and Wards */
foreach ($config_files as $config_file) {
$config_string = file_get_contents($config_file);
if ($config_string !== FALSE) {
$config = json_decode($config_string, true);
} else {
error_log("Error opening config.json.");
exit(1);
}
$config['config-file'] = $config_file;
$lga_list[] = $config;
}
/* Calculate score for candidate */
foreach ($candidate_data as &$candidate) {
$score = 0;
@@ -26,6 +48,81 @@ foreach ($candidate_data as &$candidate) {
$candidate['Score'] = $score;
}
print_r($candidate_data);
foreach ($candidate_data as &$candidate) {
/* Match user typed LGA/Ward to our database */
$max_score = 0;
foreach ($lga_list as $lga) {
$aa = preg_split("/[^a-z]/", strtolower($candidate['LGA']));
$bb = preg_split("/[^a-z]/", $lga['slug']);
$score_sum = 0;
foreach ($aa as $a) {
foreach ($bb as $b) {
similar_text($a, $b, $score);
if ($score > 70) $score_sum += $score;
else $score_sum -= 10;
}
}
if ($score_sum > $max_score) {
$max_score = $score_sum;
$match_lga = $lga;
}
}
$max_score = 0;
foreach ($match_lga['wardNames'] as $ward) {
similar_text($ward, $candidate['Ward'], $score);
if ($score > $max_score) {
$max_score = $score;
$match_ward = $ward;
}
}
$candidate['match_lga'] = $match_lga['slug'];
$candidate['match_ward'] = $match_ward;
}
/* Get picture */
foreach ($candidate_data as &$candidate) {
$candidate['match_picture'] = "";
}
$header = ["Ward", "Candidate Name", "Rating", "Picture"];
/* Generate candidates-generic.csv */
foreach ($lga_list as $lga) {
$lga_candidates = array_filter($candidate_data, function ($candidate) use ($lga) {
return $candidate['match_lga'] === $lga['slug'];
});
if (count($lga_candidates) === 0) continue;
$output_file = dirname($lga['config-file'])."/candidates-generic.csv";
if (($handle = fopen($output_file, "w")) === FALSE) {
error_log('Error opening output file');
exit(1);
}
if (fputcsv($handle, $header) === FALSE) {
error_log('Error writing headers to output file');
exit(3);
}
foreach ($lga_candidates as $candidate) {
$fields = [
$candidate['match_ward'],
$candidate['Name'],
$candidate['Score'],
$candidate['match_picture'],
];
if (fputcsv($handle, $fields) === FALSE) {
error_log('Error writing candidate to output file');
exit(3);
}
}
}
exit(0);