package maito.datacollecting.dcxml;

import java.util.HashSet;
import java.util.Iterator;
import java.util.Vector;

import maito.datacollecting.Record;
import maito.util.Tools;

/**
 * DCXMLTransformWorker
 * 
 * A class that implements the functionality for DC XML and
 * oai_citeseer transformations.
 * 
 * @version 2.5
 * @author Väinö Ala-Härkönen
 * @author Tuomas Tanner
 * @author Reima Halmetoja
 */
public class DCXMLTransformWorker {
    
    private DCXMLTransformWorker() {} // never reached

    /**
     * @param record 
     * A record object that represents a record in
     * Dublin Core 1.1 Simple plus possible Qualified extensions
     * plus possible oai_citeseer extensions laid out in the specifications.
     * Keys of the fields must be in format dc:dcFieldName (or oai:oaiFieldName)
     * @param dataType
     * Type of the source: see constants in Tools-class
     * @return
     * Returns a Record object containing data that is transformed to the 
     * unified record format that this program uses. 
     */
    protected static Record transform(Record record, int dataType) {
        Record internalRecord = new Record(record.getID());
        
        HashSet recordFields = record.getFieldNames();
        Iterator recordIterator = recordFields.iterator();
        Iterator fieldIte;
        String thisKey, thisValue, actorType;
        String[] actorValues;
        Vector thisVector;
        Vector dateHolder = new Vector();
        boolean issuedFound = false;
        boolean dateFound = false;
        
        while (recordIterator.hasNext()) {
            thisKey = (String)recordIterator.next();
            
            if (thisKey.equals("dc:title")) {
                thisVector = record.getField("dc:title");
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    if (thisValue != null) {
                        internalRecord.setField("Nimeke", thisValue);
                        internalRecord.setField("KanoNimeke", Tools.canonizeTitle(thisValue));                        
                    }
                }
            }
            else if (thisKey.equals("dc:creator")) {
                thisVector = record.getField("dc:creator");
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    if (thisValue != null) {
                        /* Tuomas 28.11.2005
                        internalRecord.setField("TekijäNimi", thisValue);
                        // HenkilöTekijä/OrganisaatioTekijä/JokuTekijä
                        actorValues = Tools.canonizeActor(thisValue, dataType);
                        actorType = actorValues[0] + "Tekijä";
                        for (int i = 1; i < actorValues.length; i++) {
                            internalRecord.setField(actorType, actorValues[i]);
                        }
                        */
                        ///* Reima 2.12.2005
                        //actorValues = Tools.canonizeActor(thisValue, dataType);
                        //internalRecord.setField(actorValues[0] + "Tekijä", thisValue);
                        //*/
                        //HenkilöTekijä/OrganisaatioTekijä/JokuTekijä
                        actorValues = Tools.splitActor(thisValue, dataType);
                        for (int i = 1; i < actorValues.length; i++) {
                            internalRecord.setField(actorValues[0] + "Tekijä", actorValues[i]);
                        }
                    }
                }
            }
            else if (thisKey.equals("dc:subject")) {
                thisVector = record.getField("dc:subject");
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    if (thisValue != null)
                        internalRecord.setField("Aihe", Tools.canonizeGeneric(thisValue));
                }
            }
            else if (thisKey.equals("dc:description")) {
                thisVector = record.getField("dc:description");
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    if (thisValue != null)
                        internalRecord.setField("Hakusanat", Tools.canonizeGeneric(thisValue));
                }
            }
            else if (thisKey.equals("dc:publisher")) {
                thisVector = record.getField("dc:publisher");
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    if (thisValue != null) {
                        /* Tuomas 28.11.2005
                        internalRecord.setField("JulkaisijaNimi", thisValue);
                        // HenkilöJulkaisija/OrganisaatioJulkaisija/JokuJulkaisija
                        actorValues = Tools.canonizeActor(thisValue, dataType);
                        actorType = actorValues[0] + "Julkaisija";
                        for (int i = 1; i < actorValues.length; i++) {
                            internalRecord.setField(actorType, actorValues[i]);
                        }
                        */
                        // HenkilöJulkaisija/OrganisaatioJulkaisija/JokuJulkaisija
                        actorValues = Tools.canonizeActor(thisValue, dataType);
                        internalRecord.setField(actorValues[0] + "Julkaisija", thisValue);
                    }
                }
            }
            else if (thisKey.equals("dc:contributor")) {
                thisVector = record.getField("dc:contributor");
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    if (thisValue != null) {
                        /* Tuomas 28.11.2005
                        internalRecord.setField("AvustajaNimi", thisValue);
                        // HenkilöAvustaja/OrganisaatioAvustaja/JokuAvustaja
                        actorValues = Tools.canonizeActor(thisValue, dataType);
                        actorType = actorValues[0] + "Avustaja";
                        for (int i = 1; i < actorValues.length; i++) {
                            internalRecord.setField(actorType, actorValues[i]);
                        }
                        */
                        // HenkilöAvustaja/OrganisaatioAvustaja/JokuAvustaja
                        actorValues = Tools.canonizeActor(thisValue, dataType);
                        internalRecord.setField(actorValues[0] + "Avustaja", thisValue);
                    }
                }
            }
            else if (thisKey.equals("dc:date")) {
                if (!issuedFound) { // Ignore if issued data already found
                    dateFound = true;
                    dateHolder = record.getField("dc:date");
                    // We might add this one to the new Record later after the whole record is processed
                }
            }
            else if (thisKey.equals("dc:type")) {
                thisVector = record.getField("dc:type");
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    if (thisValue != null)
                        internalRecord.setField("Hakusanat", Tools.canonizeGeneric(thisValue));
                }
            }
            else if (thisKey.equals("dc:format")) {
                thisVector = record.getField("dc:format");
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    if (thisValue != null)
                        internalRecord.setField("Formaatti", Tools.canonizeGeneric(thisValue));
                }
            }
            else if (thisKey.equalsIgnoreCase("dc:identifier")) {
                String idType;
                thisVector = record.getField("dc:identifier");
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    if (thisValue != null) {
                        idType = Tools.categorizeIdentifier(thisValue);
                        internalRecord.setField(idType, thisValue);                        
                    }
                }
            }
            else if (thisKey.equals("dc:language")) {
                thisVector = record.getField("dc:language");
                String canonizedLang;
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    canonizedLang = Tools.canonizeLang(thisValue);
                    if (canonizedLang != null) // Save only if in proper format
                        internalRecord.setField("Kieli", canonizedLang);
                }
            }
            else if (thisKey.equals("dc:relation")) {
                thisVector = record.getField("dc:relation");
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    if (thisValue != null) 
                        internalRecord.setField("Yhteys", thisValue);
                }
            }
            else if (thisKey.equals("dc:bibliographicCitation")) {
                thisVector = record.getField("dc:bibliographicCitation");
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    if (thisValue != null)
                        internalRecord.setField("Viittaustunniste", Tools.canonizeGeneric(thisValue));
                }
            }
            else if (thisKey.equals("dc:issued")) {
                issuedFound = true; // So we won't process dc:date anymore after this
                dateHolder = record.getField("dc:issued"); // Overwrites dc:date contents if already found
            }
            else if (thisKey.equals("dc:references") || thisKey.equals("oai_citeseer:relationReferences")) {
                if (thisKey.equals("dc:references"))
                    thisVector = record.getField("dc:references");
                else
                    thisVector = record.getField("oai_citeseer:relationReferences");
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    if (thisValue != null)
                        internalRecord.setField("Viitattu", thisValue);
                }
            }
            else if (thisKey.equals("dc:isReferencedBy") || thisKey.equals("oai_citeseer:relationIsReferencedBy")) {
                if (thisKey.equals("dc:isReferencedBy"))
                    thisVector = record.getField("dc:isReferencedBy");
                else
                    thisVector = record.getField("oai_citeseer:relationIsReferencedBy");
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    if (thisValue != null)
                        internalRecord.setField("Viittaava", thisValue);
                }
            }
            else if (thisKey.equals("dc:rightsHolder")) {
                thisVector = record.getField("dc:rightsHolder");
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    if (thisValue != null) {
                        /* Tuomas 28.11.2005
                        internalRecord.setField("Oikeuksienomistaja", thisValue);
                        // HenkilöOikeuksienomistaja/OrganisaatioOikeuksienomistaja/JokuOikeuksienomistaja...
                        actorValues = Tools.canonizeActor(thisValue, dataType);
                        actorType = actorValues[0] + "Oikeuksienomistaja";
                        for (int i = 1; i < actorValues.length; i++) {
                            internalRecord.setField(actorType, actorValues[i]);
                        }
                        */
                        // HenkilöOikeuksienomistaja/OrganisaatioOikeuksienomistaja/JokuOikeuksienomistaja...
                        actorValues = Tools.canonizeActor(thisValue, dataType);
                        internalRecord.setField(actorValues[0] + "Oikeuksienomistaja", thisValue);
                    }
                }
            }
            else if (thisKey.equals("oai_citeseer:identifier")) {
                thisVector = record.getField("oai_citeseer:identifier");
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    if (thisValue != null)
                        internalRecord.setField("SisäinenTunniste", thisValue);
                }                
            }
            else if (thisKey.equals("oai_citeseer:authorName")) {
                thisVector = record.getField("oai_citeseer:authorName");
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    if (thisValue != null) {
                        /* Reima 28.11.2005
                        internalRecord.setField("TekijäNimi", thisValue);
                        actorValues = Tools.canonizeActor(thisValue, dataType); // We don't really need the heuristics here, just the canonization part
                        actorType = "HenkilöTekijä"; // We must assume that it's always a person, see specs
                        for (int i = 1; i < actorValues.length; i++) {
                            internalRecord.setField(actorType, actorValues[i]);
                        }
                        */
                        /* Reima 2.12.2005
                        internalRecord.setField("HenkilöTekijä", thisValue); // We must assume that it's always a person, see specs
                        */
                        actorValues = Tools.splitPerson(thisValue); // We don't need the heuristics of splitActor(), since we know its a person.
                        for (int i = 1; i < actorValues.length; i++) {
                            internalRecord.setField("HenkilöTekijä", actorValues[i]);
                        }
                    }
                }                                
            }
            else if (thisKey.equals("oai_citeseer:authorAffiliation")) {
                thisVector = record.getField("oai_citeseer:authorAffiliation");
                for (fieldIte = thisVector.iterator(); fieldIte.hasNext();) {
                    thisValue = (String) fieldIte.next();
                    if (thisValue != null)
                        internalRecord.setField("OrganisaatioTekijä", Tools.canonizeGeneric(thisValue));
                }
            }
        }
        
        // Finally, if a date or issued -tag is found, add the contents in normalized form
        if (dateFound || issuedFound) {
            String canonizedDate;
            for (fieldIte = dateHolder.iterator(); fieldIte.hasNext();) {
                thisValue = (String) fieldIte.next();
                canonizedDate = Tools.canonizeDate(thisValue);
                if (canonizedDate != null) // Save only if in proper format
                    internalRecord.setField("Julkaisupäivämäärä", Tools.canonizeDate(thisValue));
            }
        }
        
        return internalRecord;
    }

}
