Start generating image download script.

This commit is contained in:
Kim Taylor
2024-09-22 22:57:35 +10:00
parent 219b242503
commit f9c151bfae
5 changed files with 111 additions and 63 deletions

View File

@@ -1,20 +0,0 @@
<?php
require_once("parse_generic_csv.php");
$options = getopt("", ["generic-csv:"]);
if (isset($options['generic-csv'])) {
$generic_csv = $options['generic-csv'];
} else {
error_log("Error: Missing required option '--generic-csv'.");
exit(1);
}
$candidate_data = parse_generic_csv($generic_csv);
$json_data = json_encode($candidate_data);
print_r($json_data);
exit(0);

View File

@@ -35,6 +35,9 @@ foreach ($config_files as $config_file) {
$lga_list[] = $config; $lga_list[] = $config;
} }
/* Match user typed LGA/Ward to our database */
match_lga($candidate_data, $lga_list);
/* Calculate score for candidate */ /* Calculate score for candidate */
foreach ($candidate_data as &$candidate) { foreach ($candidate_data as &$candidate) {
$score = 0; $score = 0;
@@ -48,46 +51,6 @@ foreach ($candidate_data as &$candidate) {
$candidate['Score'] = $score; $candidate['Score'] = $score;
} }
foreach ($candidate_data as &$candidate) {
/* Match user typed LGA/Ward to our database */
$max_score = 0;
foreach ($lga_list as $lga) {
$aa = preg_split("/[^a-z]/", strtolower($candidate['LGA']));
$bb = preg_split("/[^a-z]/", $lga['slug']);
$score_sum = 0;
foreach ($aa as $a) {
foreach ($bb as $b) {
similar_text($a, $b, $score);
if ($score > 70) $score_sum += $score;
else $score_sum -= 10;
}
}
if ($score_sum > $max_score) {
$max_score = $score_sum;
$match_lga = $lga;
}
}
$max_score = 0;
foreach ($match_lga['wardNames'] as $ward) {
similar_text($ward, $candidate['Ward'], $score);
if ($score > $max_score) {
$max_score = $score;
$match_ward = $ward;
}
}
$candidate['match_lga'] = $match_lga['slug'];
$candidate['match_ward'] = $match_ward;
}
/* Get picture */
foreach ($candidate_data as &$candidate) {
$candidate['match_picture'] = "";
}
$header = ["Ward", "Candidate Name", "Rating", "Picture"]; $header = ["Ward", "Candidate Name", "Rating", "Picture"];
/* Generate candidates-generic.csv */ /* Generate candidates-generic.csv */
@@ -115,7 +78,7 @@ foreach ($lga_list as $lga) {
$candidate['match_ward'], $candidate['match_ward'],
$candidate['Name'], $candidate['Name'],
$candidate['Score'], $candidate['Score'],
$candidate['match_picture'], $candidate['Photo'],
]; ];
if (fputcsv($handle, $fields) === FALSE) { if (fputcsv($handle, $fields) === FALSE) {

View File

@@ -0,0 +1,54 @@
<?php
require_once("parse_generic_csv.php");
$options = getopt("", ["generic-csv:", "config-files:"]);
if (isset($options['generic-csv'])) {
$generic_csv = $options['generic-csv'];
} else {
error_log("Error: Missing required option '--generic-csv'.");
exit(1);
}
if (isset($options['config-files'])) {
$config_files = $options['config-files'];
} else {
error_log("Error: Missing required option '--config-files'.");
exit(1);
}
$config_files = explode(" ", $config_files);
$candidate_data = parse_generic_csv($generic_csv);
$lga_list = [];
/* Generate dictionary of LGAs and Wards */
foreach ($config_files as $config_file) {
$config_string = file_get_contents($config_file);
if ($config_string !== FALSE) {
$config = json_decode($config_string, true);
} else {
error_log("Error opening config.json.");
exit(1);
}
$config['config-file'] = $config_file;
$lga_list[] = $config;
}
/* Match user typed LGA/Ward to our database */
match_lga($candidate_data, $lga_list);
$image_map = [];
foreach ($candidate_data as $candidate) {
if (strlen($candidate['photo_url'])) {
$map['url'] = $candidate['photo_url'];
$map['match_lga'] = $candidate['match_lga'];
$image_map[$candidate['Photo']] = $map;
}
}
$json_data = json_encode($image_map);
print_r($json_data);
exit(0);

View File

@@ -23,6 +23,11 @@ function parse_generic_csv($generic_csv) {
if (strstr($data[$key], "I pledge")) $data[$key] = "Yes"; if (strstr($data[$key], "I pledge")) $data[$key] = "Yes";
else $data[$key] = "No"; else $data[$key] = "No";
} }
if ($value === "Photo") {
$candidate['photo_url'] = $data[$key];
$data[$key] = preg_filter("/.*id=/", "", $data[$key]);
}
if ($is_question) { if ($is_question) {
$candidate['q'.$question_no++] = $data[$key]; $candidate['q'.$question_no++] = $data[$key];
@@ -44,3 +49,40 @@ function parse_generic_csv($generic_csv) {
return $candidate_data; return $candidate_data;
} }
function match_lga(&$candidate_data, $lga_list) {
foreach ($candidate_data as &$candidate) {
/* Match user typed LGA/Ward to our database */
$max_score = 0;
foreach ($lga_list as $lga) {
$aa = preg_split("/[^a-z]/", strtolower($candidate['LGA']));
$bb = preg_split("/[^a-z]/", $lga['slug']);
$score_sum = 0;
foreach ($aa as $a) {
foreach ($bb as $b) {
similar_text($a, $b, $score);
if ($score > 70) $score_sum += $score;
else $score_sum -= 10;
}
}
if ($score_sum > $max_score) {
$max_score = $score_sum;
$match_lga = $lga;
}
}
$max_score = 0;
foreach ($match_lga['wardNames'] as $ward) {
similar_text($ward, $candidate['Ward'], $score);
if ($score > $max_score) {
$max_score = $score;
$match_ward = $ward;
}
}
$candidate['match_lga'] = $match_lga['slug'];
$candidate['match_ward'] = $match_ward;
}
}

View File

@@ -15,6 +15,15 @@ for folder in "$DATA_PATH"/*; do
fi fi
done done
#php csv-generic/csv-to-json.php --generic-csv $DATA_LOC/responses.csv
php csv-generic/gen-generic.php --generic-csv $GENERIC_SURVEY --config-files "${config_files[*]}" image_map=$(php csv-generic/gen-image-map.php --generic-csv $GENERIC_SURVEY --config-files "${config_files[*]}")
for key in $(jq -r 'keys[]' <<< $image_map) ; do
url=$(jq -r ".[\"$key\"][\"url\"]" <<< $image_map)
lga=$(jq -r ".[\"$key\"][\"match_lga\"]" <<< $image_map)
dst=$DATA_PATH/$lga/$key
echo wget $url -O $dst
break
done
#php csv-generic/gen-generic.php --generic-csv $GENERIC_SURVEY --config-files "${config_files[*]}"