# -*- coding: utf-8 -*-
"""
Created on oct. 30, 2014, 23:52
Copyright François Durand 2014, 2015
fradurand@gmail.com
This file is part of SVVAMP.
SVVAMP is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
SVVAMP is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SVVAMP. If not, see <http://www.gnu.org/licenses/>.
==============
The function preflib_to_preferences_utilities below is adapted from
PreflibUtils.py by Nicholas Mattei. We reproduce its license here.
File: PrefLibUtilities.py
Author: Nicholas Mattei (nicholas.mattei@nicta.com.au)
Date: April 4, 2013
November 6th, 2013
* Copyright (c) 2014, Nicholas Mattei and NICTA
* All rights reserved.
*
* Developed by: Nicholas Mattei
* NICTA
* http://www.nickmattei.net
* http://www.preflib.org
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NICTA nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY NICTA ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NICTA BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
import numpy as np
import pandas as pd
from svvamp.Preferences.Population import \
preferences_ut_to_preferences_borda_ut
from svvamp.Preferences.Population import Population
[docs]class PopulationFromFile(Population):
_layout_name = 'From file'
def __init__(self, file_name, relative_noise=0., absolute_noise=0.):
"""Population from a file.
:param file_name: -- String. The name of the file.
:param relative_noise: -- Number.
:param absolute_noise: -- Number.
:return: A :class:`~svvamp.Population` object.
If the file name ends with '.t.csv' (t = transposed format): simple
table of utilities with candidates declared in the first column and
voters declared in the first row.
If the file name ends with '.csv' (but not '.t.csv'), candidates
must be declared in the first row and voters in the first column.
Otherwise, the file is considered as a PrefLib file. In that case,
since information is ordinal only,
``preferences_ut[v, c]`` is set to the Borda score
(with no vtb) minus ``(C - 1) / 2``. This way, utilities are between
``- (C - 1) / 2`` and ``(C - 1) / 2``.
To each ``preferences_ut[v, c]``, a random noise is added which
is drawn independently and uniformly in the interval
``[- relative_noise * amplitude, relative_noise * amplitude]``,
where ``amplitude`` is the difference between the lowest and the
highest utility.
Another random noise is added, which is drawn independently and
uniformly in the interval ``[-absolute_noise, absolute_noise]``.
"""
if file_name[-4:] == '.csv':
df = pd.read_csv(filepath_or_buffer=file_name, sep=';',
index_col=0)
if file_name[-6:-4] == '.t':
preferences_utilities = df.transpose().values
labels_candidates = df.index.values.astype(np.str)
else:
preferences_utilities = df.values
labels_candidates = df.columns.values.astype(np.str)
pop_temp = Population(preferences_utilities)
nb_victories_temp = np.sum(pop_temp.matrix_victories_ut_rel, 1)
scores_temp = (nb_victories_temp +
pop_temp.borda_score_c_ut / pop_temp.C /
pop_temp.V)
candidates_best_to_worst = np.argsort(- scores_temp,
kind='mergesort')
preferences_utilities = preferences_utilities[
:, candidates_best_to_worst]
labels_candidates = labels_candidates[candidates_best_to_worst]
else:
preferences_utilities, labels_candidates = (
preflib_to_preferences_utilities(file_name))
preferences_utilities = preferences_utilities.astype(np.float)
total_noise = absolute_noise
if relative_noise != 0:
amplitude = np.max(preferences_utilities) - np.min(
preferences_utilities)
total_noise += relative_noise * amplitude
if total_noise != 0:
preferences_utilities += total_noise * 2 * (
0.5 - np.random.rand(*preferences_utilities.shape))
log_creation = ['From file',
'File name', file_name,
'Relative noise', relative_noise,
'Absolute noise', absolute_noise]
super().__init__(preferences_ut=preferences_utilities,
log_creation=log_creation,
labels_candidates=labels_candidates)
# TODO: iterator and meta-iterator
@staticmethod
def iterator(culture_parameters, nb_populations):
for i in range(nb_populations):
yield PopulationFromFile(**culture_parameters)
# @staticmethod
# def meta_iterator(culture_parameters_list, nb_populations):
# for C, V, culture_parameters in itertools.product(
# C_list, V_list, culture_parameters_list):
# log_csv = ['Euclidean box',
# 'Box dimensions', culture_parameters['box_dimensions'],
# 'Number of dimensions',
# len(culture_parameters['box_dimensions'])]
# log_print = ('Euclidean box, V = ' + str(V) + ', C = ' + str(C) +
# ', box dimensions = ' +
# format(culture_parameters['box_dimensions']))
# yield log_csv, log_print, PopulationFromFile.iterator(
# C, V, culture_parameters, nb_populations)
def preflib_to_preferences_utilities(file_name):
# TODO: Is the copyright mention above enough? Ask Nicholas Mattei.
# PreflibUtils.py
input_file = open(file_name, 'r')
# Number of candidates
l = input_file.readline()
# Map of candidates
C = int(l.strip())
candidates_map = {}
labels_candidates = []
for c in range(C):
bits = input_file.readline().strip().split(",")
candidates_map[int(bits[0].strip()) - 1] = bits[1].strip()
labels_candidates.append(bits[1].strip())
# Now we have V, sum_of_vote_count, num_unique orders
bits = input_file.readline().strip().split(",")
V = int(bits[0].strip())
sum_votes = int(bits[1].strip())
unique_orders = int(bits[2].strip())
# Now, the ballots themselves
preferences_utilities = np.zeros((V, C))
start_index = 0
for i in range(unique_orders):
rec = input_file.readline().strip()
#need to parse the rec properly..
count = int(rec[:rec.index(",")])
bits = rec[rec.index(",")+1:].strip().split(",")
ballot_temp = np.full(C, - C)
if rec.find("{") == -1:
#its strict, just split on ,
for crank in range(len(bits)):
ballot_temp[int(bits[crank]) - 1] = - crank
else:
crank = 0
partial = False
for ccand in bits:
if ccand.find("{") != -1:
partial = True
t = ccand.replace("{","")
ballot_temp[int(t.strip()) - 1] = - crank
elif ccand.find("}") != -1:
partial = False
t = ccand.replace("}","")
ballot_temp[int(t.strip()) - 1] = - crank
crank += 1
else:
ballot_temp[int(ccand.strip()) - 1] = - crank
if partial == False:
crank += 1
preferences_utilities[
start_index:start_index + count, :] = \
preferences_ut_to_preferences_borda_ut(
ballot_temp[np.newaxis, :]) - (C - 1) / 2
start_index += count
input_file.close()
return preferences_utilities, labels_candidates
if __name__ == '__main__':
# pop = PopulationFromFile('ED-00001-00000001.soi')
pop = PopulationFromFile('example_ballots.t.csv',
absolute_noise=0.5)
# preferences_ut = pop.preferences_ut[:, 0:3]
# preferences_ut -= np.mean(preferences_ut, 1)[:, np.newaxis]
# pop_temp = Population(preferences_ut)
pop.demo()
pop.plot3(use_labels=True)
pop.plot4(use_labels=True)