package pt.utl.ist.scripts.runOnce.student; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; import net.sourceforge.fenixedu.domain.Person; import net.sourceforge.fenixedu.domain.contacts.EmailAddress; import net.sourceforge.fenixedu.domain.contacts.MobilePhone; import net.sourceforge.fenixedu.domain.contacts.Phone; import net.sourceforge.fenixedu.domain.organizationalStructure.Party; import net.sourceforge.fenixedu.domain.person.IdDocument; import net.sourceforge.fenixedu.domain.student.Registration; import pt.ist.fenixframework.FenixFramework; import pt.utl.ist.fenix.tools.util.excel.Spreadsheet; import pt.utl.ist.fenix.tools.util.excel.Spreadsheet.Row; import pt.utl.ist.scripts.commons.AtomicScript; public class DuplicatedStudentsSeeker extends AtomicScript { Map> matchesByIRS; Map> matchesByEmail; Map> matchesByPhone; Map> matchesByCell; Map> matchesByName; Map> matchesByBday; Map> matchesByDadName; Map> matchesByMomName; Map> matchesByAddress; Map matchCounter; @Override protected void run() throws Exception { harvestData(); analyseData(); reportData(); } private void harvestData() { matchesByIRS = new HashMap>(); matchesByEmail = new HashMap>(); matchesByPhone = new HashMap>(); matchesByCell = new HashMap>(); matchesByName = new HashMap>(); matchesByBday = new HashMap>(); matchesByDadName = new HashMap>(); matchesByMomName = new HashMap>(); matchesByAddress = new HashMap>(); matchCounter = new HashMap(); //List limitedSet = peopleSubset(); //for(Party party : limitedSet) { for (Party party : rootDomainObject.getPartysSet()) { if (party instanceof Person) { Person reference = ((Person) party); if (reference.getIstUsername() != null) { storeIRS(reference); storeEmail(reference); storePhone(reference); storeCell(reference); storeName(reference); storeBday(reference); storeDadName(reference); storeMomName(reference); storeAddress(reference); } } } } private void analyseData() { Set positives = new TreeSet(MATCH_COMPARATOR); for (Entry entry : matchCounter.entrySet()) { if (entry.getValue() > 3) { positives.add(entry.getKey()); } } for (Match match : positives) { System.out.println(match.getOriginal().getIstUsername() + "\t" + match.getClone().getIstUsername()); } System.out.println("\nTotal number of matches:\t" + positives.size() + "."); } private List peopleSubset() { List peeps = new ArrayList(); Person dude1 = Person.readPersonByIstUsername("ist145115"); Person dude2 = Person.readPersonByIstUsername("ist162288"); peeps.add(dude1); peeps.add(dude2); return peeps; } private void storeIRS(Person reference) { if (reference.getSocialSecurityNumber() == null) { return; } if (reference.getSocialSecurityNumber().isEmpty()) { return; } if (reference.getSocialSecurityNumber().trim().isEmpty()) { return; } if (matchesByIRS.containsKey(reference.getSocialSecurityNumber())) { updateCounter(reference, matchesByIRS.get(reference.getSocialSecurityNumber())); matchesByIRS.get(reference.getSocialSecurityNumber()).add(reference); } else { List matches = new ArrayList(); matches.add(reference); matchesByIRS.put(reference.getSocialSecurityNumber(), matches); } } private void storeEmail(Person reference) { for (EmailAddress email : reference.getEmailAddresses()) { if (email.getPresentationValue() == null) { continue; } if (email.getPresentationValue().isEmpty()) { continue; } if (email.getPresentationValue().trim().isEmpty()) { continue; } if (matchesByEmail.containsKey(email.getPresentationValue())) { updateCounter(reference, matchesByEmail.get(email.getPresentationValue())); matchesByEmail.get(email.getPresentationValue()).add(reference); } else { List matches = new ArrayList(); matches.add(reference); matchesByEmail.put(email.getPresentationValue(), matches); } } } private void storePhone(Person reference) { for (Phone phone : reference.getPhones()) { if (phone.getNumber() == null) { continue; } if (phone.getNumber().isEmpty()) { continue; } if (phone.getNumber().trim().isEmpty()) { continue; } if (matchesByPhone.containsKey(phone.getNumber())) { updateCounter(reference, matchesByPhone.get(phone.getNumber())); matchesByPhone.get(phone.getNumber()).add(reference); } else { List matches = new ArrayList(); matches.add(reference); matchesByPhone.put(phone.getNumber(), matches); } } } private void storeCell(Person reference) { for (MobilePhone cell : reference.getMobilePhones()) { if (cell.getNumber() == null) { continue; } if (cell.getNumber().isEmpty()) { continue; } if (cell.getNumber().trim().isEmpty()) { continue; } if (matchesByCell.containsKey(cell.getNumber())) { updateCounter(reference, matchesByCell.get(cell.getNumber())); matchesByCell.get(cell.getNumber()).add(reference); } else { List matches = new ArrayList(); matches.add(reference); matchesByCell.put(cell.getNumber(), matches); } } } private void storeName(Person reference) { if (reference.getName() == null) { return; } if (reference.getName().isEmpty()) { return; } if (reference.getName().trim().isEmpty()) { return; } if (matchesByName.containsKey(reference.getName())) { updateCounter(reference, matchesByName.get(reference.getName())); matchesByName.get(reference.getName()).add(reference); } else { List matches = new ArrayList(); matches.add(reference); matchesByName.put(reference.getName(), matches); } } private void storeBday(Person reference) { if (reference.getDateOfBirthYearMonthDay() == null) { return; } if (reference.getDateOfBirthYearMonthDay().toString("ddMMyyyy") == null) { return; } if (reference.getDateOfBirthYearMonthDay().toString("ddMMyyyy").isEmpty()) { return; } if (reference.getDateOfBirthYearMonthDay().toString("ddMMyyyy").trim().isEmpty()) { return; } if (matchesByBday.containsKey(reference.getDateOfBirthYearMonthDay().toString("ddMMyyyy"))) { updateCounter(reference, matchesByBday.get(reference.getDateOfBirthYearMonthDay().toString("ddMMyyyy"))); matchesByBday.get(reference.getDateOfBirthYearMonthDay().toString("ddMMyyyy")).add(reference); } else { List matches = new ArrayList(); matches.add(reference); matchesByBday.put(reference.getDateOfBirthYearMonthDay().toString("ddMMyyyy"), matches); } } private void storeDadName(Person reference) { if (reference.getNameOfFather() == null) { return; } if (reference.getNameOfFather().isEmpty()) { return; } if (reference.getNameOfFather().trim().isEmpty()) { return; } if (matchesByDadName.containsKey(reference.getNameOfFather())) { updateCounter(reference, matchesByDadName.get(reference.getNameOfFather())); matchesByDadName.get(reference.getNameOfFather()).add(reference); } else { List matches = new ArrayList(); matches.add(reference); matchesByDadName.put(reference.getNameOfFather(), matches); } } private void storeMomName(Person reference) { if (reference.getNameOfMother() == null) { return; } if (reference.getNameOfMother().isEmpty()) { return; } if (reference.getNameOfMother().trim().isEmpty()) { return; } if (matchesByMomName.containsKey(reference.getNameOfMother())) { updateCounter(reference, matchesByMomName.get(reference.getNameOfMother())); matchesByMomName.get(reference.getNameOfMother()).add(reference); } else { List matches = new ArrayList(); matches.add(reference); matchesByMomName.put(reference.getNameOfMother(), matches); } } private void storeAddress(Person reference) { if (reference.getAddress() == null) { return; } if (reference.getAddress().isEmpty()) { return; } if (reference.getAddress().trim().isEmpty()) { return; } if (reference.getAddress().split(" ").length < 3) { return; } if (matchesByAddress.containsKey(reference.getAddress())) { updateCounter(reference, matchesByAddress.get(reference.getAddress())); matchesByAddress.get(reference.getAddress()).add(reference); } else { List matches = new ArrayList(); matches.add(reference); matchesByAddress.put(reference.getAddress(), matches); } } private void updateCounter(Person clone, List people) { if (people.isEmpty()) { return; } for (Person reference : people) { Match match = new Match(reference, clone); if (matchCounter.containsKey(match)) { matchCounter.put(match, matchCounter.get(match) + 1); } else { matchCounter.put(match, 1); } } } private void reportData() throws IOException { int likelihood = 4; Spreadsheet spreadsheet = new Spreadsheet("Duplicacao Pessoas - Positivos(Grau" + likelihood + ").xls"); File spreadsheetFile = new File("Duplicacao Pessoas - Positivos(Grau" + likelihood + ").xls"); spreadsheet.addRow(); spreadsheet.addRow(); spreadsheet.addRow(); Set positives = new TreeSet(MATCH_COMPARATOR); for (Entry entry : matchCounter.entrySet()) { if (entry.getValue() >= likelihood) { positives.add(entry.getKey()); } } int matchCnt = 0; for (Match match : positives) { //Title Row row = spreadsheet.addRow(); row.setCell("Caso " + ++matchCnt); spreadsheet.addRow(); //Name row = spreadsheet.addRow(); row.setCell("Nome"); row.setCell(match.getOriginal().getName()); row.setCell(match.getClone().getName()); //ISTid row = spreadsheet.addRow(); row.setCell("IST-Id"); row.setCell(match.getOriginal().getIstUsername()); row.setCell(match.getClone().getIstUsername()); //IRS row = spreadsheet.addRow(); row.setCell("Contribuinte"); row.setCell(match.getOriginal().getSocialSecurityNumber()); row.setCell(match.getClone().getSocialSecurityNumber()); //ID row = spreadsheet.addRow(); row.setCell("Doc. Id."); StringBuilder strBld = new StringBuilder(); boolean looping = false; for (IdDocument id : match.getOriginal().getIdDocuments()) { if (looping) { strBld.append(','); } strBld.append(id.getValue()); looping = true; } row.setCell(strBld.toString()); strBld = new StringBuilder(); looping = false; for (IdDocument id : match.getClone().getIdDocuments()) { if (looping) { strBld.append(','); } strBld.append(id.getValue()); looping = true; } row.setCell(strBld.toString()); //DadName row = spreadsheet.addRow(); row.setCell("Nome Pai"); row.setCell(match.getOriginal().getNameOfFather()); row.setCell(match.getClone().getNameOfFather()); //MomName row = spreadsheet.addRow(); row.setCell("Nome Mae"); row.setCell(match.getOriginal().getNameOfMother()); row.setCell(match.getClone().getNameOfMother()); //Employee row = spreadsheet.addRow(); row.setCell("Funcionario?"); row.setCell(match.getOriginal().getEmployee() != null ? "SIM" : "Nao"); row.setCell(match.getClone().getEmployee() != null ? "SIM" : "Nao"); //Teacher row = spreadsheet.addRow(); row.setCell("Docente?"); row.setCell(match.getOriginal().getTeacher() != null ? "SIM" : "Nao"); row.setCell(match.getClone().getTeacher() != null ? "SIM" : "Nao"); //Student row = spreadsheet.addRow(); row.setCell("Aluno?"); row.setCell(match.getOriginal().getStudent() != null ? "SIM" : "Nao"); row.setCell(match.getClone().getStudent() != null ? "SIM" : "Nao"); //StudenProps row = spreadsheet.addRow(); row.setCell("Dados Aluno"); if (match.getOriginal().getStudent() != null) { strBld = new StringBuilder(); looping = false; for (Registration reg : match.getOriginal().getStudent().getRegistrations()) { if (looping) { strBld.append(','); } strBld.append(reg.getDegreeCurricularPlanName() + "(" + reg.getNumber() + ")"); } row.setCell(strBld.toString()); } if (match.getClone().getStudent() != null) { strBld = new StringBuilder(); looping = false; for (Registration reg : match.getClone().getStudent().getRegistrations()) { if (looping) { strBld.append(','); } strBld.append(reg.getDegreeCurricularPlanName() + "(" + reg.getNumber() + ")"); } row.setCell(strBld.toString()); } spreadsheet.addRow(); spreadsheet.addRow(); spreadsheet.addRow(); } spreadsheet.exportToXLSSheet(spreadsheetFile); } public static void main(String[] args) { process(new DuplicatedStudentsSeeker()); System.exit(0); } private class Match { private final String original; private final String clone; Match(Person original, Person clone) { this.original = original.getExternalId(); this.clone = clone.getExternalId(); } public Person getOriginal() { return FenixFramework.getDomainObject(original); } public Person getClone() { return FenixFramework.getDomainObject(clone); } @Override public boolean equals(Object object) { if (object instanceof Match) { Match match = ((Match) object); if (getOriginal().getIstUsername().equals(match.getOriginal().getIstUsername()) && getClone().getIstUsername().equals(match.getClone().getIstUsername())) { return true; } if (getOriginal().getIstUsername().equals(match.getClone().getIstUsername()) && getClone().getIstUsername().equals(match.getOriginal().getIstUsername())) { return true; } } return false; } @Override public int hashCode() { return getOriginal().hashCode() ^ getClone().hashCode(); } } public static final Comparator MATCH_COMPARATOR = new Comparator() { @Override public int compare(Match o1, Match o2) { return o1.getOriginal().getIstUsername().compareTo(o2.getOriginal().getIstUsername()); } }; }