Files
spl-tools/csv-generic/parse_generic_csv.php

108 lines
3.5 KiB
PHP

<?php
function parse_generic_csv($generic_csv) {
$candidate_data = [];
if (($handle = fopen($generic_csv, "r")) !== FALSE) {
$headers = fgetcsv($handle);
while (($data = fgetcsv($handle)) !== FALSE) {
$candidate = [];
$question_no = 0;
$is_question = false;
foreach ($headers as $key => $value) {
/* Override key name for questions */
if ($value === "Verified") {
$is_question = false;
}
if (strstr($value, "candidate photo")) $value = "Photo";
if (strstr($value, "In which Local Government Area")) $value = "LGA";
if (strstr($value, "In which Ward")) $value = "Ward";
if (strstr($value, "Political Party")) $value = "Party";
if ($value === "Pledge") {
if (strstr($data[$key], "I pledge")) $data[$key] = "y";
else $data[$key] = "n";
}
if ($value === "Photo") {
$candidate['photo_url'] = $data[$key];
$data[$key] = preg_filter("/.*id=/", "", $data[$key]);
}
if ($is_question) {
$candidate['q'.$question_no++] = $data[$key];
} else {
$candidate[$value] = $data[$key];
}
if ($value === "Pledge") {
$is_question = true;
}
}
$candidate_data[] = $candidate;
}
fclose($handle);
} else {
error_log('Error opening candidates file');
exit(1);
}
return $candidate_data;
}
function match_lga(&$candidate_data, $lga_list) {
foreach ($candidate_data as &$candidate) {
/* Match user typed LGA/Ward to our database */
$max_score = 0;
foreach ($lga_list as $lga) {
$aa = preg_split("/[^a-z]/", strtolower($candidate['LGA']));
$bb = preg_split("/[^a-z]/", $lga['slug']);
$score_sum = 0;
foreach ($aa as $a) {
foreach ($bb as $b) {
similar_text($a, $b, $score);
if ($score > 70) $score_sum += $score;
else $score_sum -= 1;
}
}
if ($score_sum > $max_score) {
$max_score = $score_sum;
$match_lga = $lga;
}
}
$max_score = 0;
foreach ($match_lga['wardNames'] as $ward) {
similar_text(strtolower($ward), strtolower($candidate['Ward']), $score);
if ($score >= $max_score) {
$max_score = $score;
$match_ward = $ward;
}
}
$candidate['match_lga'] = $match_lga['slug'];
$candidate['match_ward'] = $match_ward;
}
}
function remove_duplicates(&$candidate_data) {
$names = [];
$duplicates = [];
foreach ($candidate_data as $candidate_key => $candidate) {
/* If we've already had this name, remove the old entry */
foreach ($names as $name_key => $name) {
similar_text(strtolower($name), strtolower($candidate['Name']), $score);
if ($score > 90) {
$duplicates[] = $name_key;
}
}
$names[$candidate_key] = $candidate['Name'];
}
$duplicates = array_unique($duplicates);
foreach ($duplicates as $duplicate) {
unset($candidate_data[$duplicate]);
}
}