Commit 863053c9 authored by mihaidascalu's avatar mihaidascalu

#16 Community processing bug fixes and cleaning of unused clustering methods

parent a6def7da
......@@ -6,7 +6,7 @@
<groupId>com.readerbench</groupId>
<artifactId>${artifactory.id}</artifactId>
<version>3.0.2</version>
<version>3.0.3</version>
<packaging>jar</packaging>
<properties>
......
......@@ -15,6 +15,7 @@
*/
package com.readerbench.data;
import com.readerbench.data.cscl.Utterance;
import com.readerbench.data.discourse.Keyword;
import com.readerbench.data.sentiment.SentimentEntity;
import com.readerbench.services.semanticModels.ISemanticModel;
......@@ -23,6 +24,7 @@ import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.*;
/**
* This abstract class is the base for all type of elements. It is extended
* later for all processing elements in the following hierarchical order:
......@@ -320,7 +322,6 @@ public abstract class AnalysisElement implements Serializable {
this.voiceDistribution = voiceDistribution;
}
public double[] getExtendedVoiceDistribution() {
return extendedVoiceDistribution;
}
......@@ -380,10 +381,10 @@ public abstract class AnalysisElement implements Serializable {
@Override
public int hashCode() {
int hash = 7;
hash = 71 * hash + this.index;
hash = 71 * hash + Objects.hashCode(this.text);
return hash;
if (this instanceof Utterance) {
return Objects.hash(this.index, this.text, ((Utterance) this).getTime(), this.getContainer());
}
return Objects.hash(this.index, this.text, this.getContainer());
}
@Override
......@@ -404,8 +405,9 @@ public abstract class AnalysisElement implements Serializable {
if (!Objects.equals(this.text, other.text)) {
return false;
}
return true;
if (this instanceof Utterance && other instanceof Utterance && !Objects.equals(((Utterance) this).getTime(), ((Utterance) other).getTime())) {
return false;
}
return Objects.equals(this.getContainer(), other.getContainer());
}
}
......@@ -106,14 +106,8 @@ public class Block extends AnalysisElement implements Serializable {
}
public static void addBlock(AbstractDocument d, Block b) {
// if (b.getIndex() != -1) {
// while (d.getBlocks().size() < b.getIndex()) {
// d.getBlocks().add(null);
// }s
// d.getBlocks().add(b.getIndex(), b);
// } else {
d.getBlocks().add(b);
// }
d.setText(d.getText() + b.getText() + "\n");
d.setProcessedText(d.getProcessedText() + b.getProcessedText() + "\n");
}
......@@ -317,14 +311,14 @@ public class Block extends AnalysisElement implements Serializable {
.flatMap(s -> s.getBiGrams().stream())
.collect(Collectors.toList());
}
@Override
public List<NGram> getNGrams(int n) {
return sentences.stream()
.flatMap(s -> s.getNGrams(n).stream())
.collect(Collectors.toList());
}
@Override
public String toString() {
String s = "";
......
......@@ -20,7 +20,6 @@ import java.util.ResourceBundle;
public enum CSCLIndices {
NO_CONTRIBUTION(true, true),
SCORE(true, true),
PERSONAL_KB(false, true),
SOCIAL_KB(true, true),
INTER_ANIMATION_DEGREE(false, true),
INDEGREE(true, true),
......
......@@ -101,6 +101,7 @@ public class Conversation extends AbstractDocument {
intenseCollabZonesVoice = new ArrayList<>();
annotatedCollabZones = new ArrayList<>();
}
/**
* @param path
* @param contents
......@@ -110,7 +111,6 @@ public class Conversation extends AbstractDocument {
*/
public Conversation(String path, AbstractDocumentTemplate contents, List<ISemanticModel> models, Lang lang, boolean usePOSTagging) {
this(path, models, lang);
this.setText(contents.getText());
setDocTmp(contents);
Parsing.getParser(lang).parseDoc(contents, this, usePOSTagging);
this.determineParticipantContributions();
......@@ -118,7 +118,6 @@ public class Conversation extends AbstractDocument {
public Conversation(AbstractDocumentTemplate contents, List<ISemanticModel> models, Lang lang, boolean usePOSTagging) {
this(models, lang);
this.setText(contents.getText());
setDocTmp(contents);
Parsing.getParser(lang).parseDoc(contents, this, usePOSTagging);
this.determineParticipantContributions();
......@@ -315,6 +314,7 @@ public class Conversation extends AbstractDocument {
/**
* Load conversation
*
* @param conversation - conversation
* @param models - semantic models
* @param lang - language
......@@ -322,7 +322,7 @@ public class Conversation extends AbstractDocument {
* @return
*/
public Conversation loadConversation(com.readerbench.solr.entities.cscl.Conversation conversation,
List<ISemanticModel> models, Lang lang, boolean usePOSTagging) {
List<ISemanticModel> models, Lang lang, boolean usePOSTagging) {
Conversation c = null;
// determine contents
AbstractDocumentTemplate contents = new AbstractDocumentTemplate();
......@@ -360,9 +360,6 @@ public class Conversation extends AbstractDocument {
support.mergeAdjacentContributions(blocks);
contents.setBlocks(support.newBlocks);
c = new Conversation(contents, models, lang, usePOSTagging);
c.setDocumentTitle("reddit", models, lang, usePOSTagging);
} catch (Exception ex) {
Exceptions.printStackTrace(ex);
}
......@@ -371,6 +368,7 @@ public class Conversation extends AbstractDocument {
/**
* Transform long date to String date - "yyyy-MM-dd HH:mm:ss" format
*
* @param time - Long date
* @return - String date
*/
......@@ -378,7 +376,7 @@ public class Conversation extends AbstractDocument {
try {
Date date = new Date(time);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
return sdf.format(date);
return sdf.format(date);
} catch (Exception e) {
LOGGER.warn("Error transforming date..." + time);
//e.printStackTrace();
......@@ -440,6 +438,7 @@ public class Conversation extends AbstractDocument {
}
return distribution;
}
/**
* @param voice
* @param p
......@@ -540,7 +539,7 @@ public class Conversation extends AbstractDocument {
//initialization: create mapping between block IDs and initial index positions in array
initialMapping = new TreeMap<>();
for (int i = 0; i < blocks.size(); i++) {
if(blocks.get(i) != null && blocks.get(i).getId() != null) {
if (blocks.get(i) != null && blocks.get(i).getId() != null) {
initialMapping.put(blocks.get(i).getId(), i);
}
}
......@@ -557,7 +556,7 @@ public class Conversation extends AbstractDocument {
//check if an explicit ref exists; in that case, perform merge only if link is between crt and previous contribution
boolean explicitRefCriterion = true;
if (crt.getRefId()!= null && crt.getRefId() != 0 && (!crt.getRefId().equals(prev.getId()))) {
if (crt.getRefId() != null && crt.getRefId() != 0 && (!crt.getRefId().equals(prev.getId()))) {
explicitRefCriterion = false;
}
if (crt.getSpeaker().equals(prev.getSpeaker()) && diffMinutes <= 1 && explicitRefCriterion) {
......
......@@ -22,6 +22,7 @@ import java.util.EnumMap;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
public class Participant implements Comparable<Participant>, Serializable {
......@@ -35,13 +36,13 @@ public class Participant implements Comparable<Participant>, Serializable {
private double textualComplexityLevel;
private EnumMap<CSCLIndices, Double> indices;
private Map<Entry<CSCLIndices, CSCLCriteria>, Double> longitudinalIndices;
// added by valentin.sergiu.cioaca@gmail.com
private double rhythmicIndexSM; // Solomon Marcus study
private double freqMaxRhythmIndex;
private double rhythmicCoefficient;
private double chatEntropyForRegularity;
private ParticipantGroup participantGroup;
public Participant(String name, AbstractDocument d) {
......@@ -66,11 +67,10 @@ public class Participant implements Comparable<Participant>, Serializable {
this.resetIndices();
}
/*----------------------------------------------------------------------------*/
public void setRhythmicIndex(double rhythmicIndex) {
this.rhythmicIndexSM = rhythmicIndex;
}
public void setRhythmicCoefficient(double rhythmicCoefficient) {
this.rhythmicCoefficient = rhythmicCoefficient;
}
......@@ -78,37 +78,31 @@ public class Participant implements Comparable<Participant>, Serializable {
public void setChatEntropy(double chatEntropy) {
this.chatEntropyForRegularity = chatEntropy;
}
public void setFreqMaxRhythmIndex(double freqMaxId) {
this.freqMaxRhythmIndex = freqMaxId;
}
public double getRhythmicIndex() {
return this.rhythmicIndexSM;
}
public double getRhythmicCoefficient() {
return this.rhythmicCoefficient;
}
public double getChatEntropy() {
return this.chatEntropyForRegularity;
}
public double getFreqMaxRhythmIndex() {
return this.freqMaxRhythmIndex;
}
/*----------------------------------------------------------------------------*/
public String getName() {
return name;
}
// public void setName(String name) {
// this.name = name;
// }
public String getAlias() {
return alias;
}
......@@ -187,6 +181,13 @@ public class Participant implements Comparable<Participant>, Serializable {
return this.getName().equals(p.getName());
}
@Override
public int hashCode() {
int hash = 7;
hash = 53 * hash + Objects.hashCode(this.name);
return hash;
}
@Override
public int compareTo(Participant o) {
return this.getName().compareTo(o.getName());
......
/*************************************************************************
* ADOBE CONFIDENTIAL
* ___________________
/*
* Copyright 2018 ReaderBench.
*
* Copyright 2016 Adobe Systems Incorporated
* All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* NOTICE: All information contained herein is, and remains
* the property of Adobe Systems Incorporated and its suppliers,
* if any. The intellectual and technical concepts contained
* herein are proprietary to Adobe Systems Incorporated and its
* suppliers and are protected by all applicable intellectual property
* laws, including trade secret and copyright laws.
* Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained
* from Adobe Systems Incorporated.
**************************************************************************/
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.readerbench.data.cscl;
public enum ParticipantGroup {
......@@ -24,7 +21,7 @@ public enum ParticipantGroup {
private final int clusterNo;
private ParticipantGroup(int clusterNo){
private ParticipantGroup(int clusterNo) {
this.clusterNo = clusterNo;
}
......
/*************************************************************************
* ADOBE CONFIDENTIAL
* ___________________
/*
* Copyright 2018 ReaderBench.
*
* Copyright 2016 Adobe Systems Incorporated
* All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* NOTICE: All information contained herein is, and remains
* the property of Adobe Systems Incorporated and its suppliers,
* if any. The intellectual and technical concepts contained
* herein are proprietary to Adobe Systems Incorporated and its
* suppliers and are protected by all applicable intellectual property
* laws, including trade secret and copyright laws.
* Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained
* from Adobe Systems Incorporated.
**************************************************************************/
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.readerbench.data.cscl;
import java.util.Arrays;
......@@ -87,19 +84,32 @@ public class ParticipantNormalized {
return name + "," + indegree + "," + outdegree;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof ParticipantNormalized)) return false;
if (this == o) {
return true;
}
if (!(o instanceof ParticipantNormalized)) {
return false;
}
ParticipantNormalized that = (ParticipantNormalized) o;
if (Double.compare(that.eccentricity, eccentricity) != 0) return false;
if (Double.compare(that.indegree, indegree) != 0) return false;
if (Double.compare(that.outdegree, outdegree) != 0) return false;
if (!name.equals(that.name)) return false;
if (!Arrays.equals(vector, that.vector)) return false;
if (Double.compare(that.eccentricity, eccentricity) != 0) {
return false;
}
if (Double.compare(that.indegree, indegree) != 0) {
return false;
}
if (Double.compare(that.outdegree, outdegree) != 0) {
return false;
}
if (!name.equals(that.name)) {
return false;
}
if (!Arrays.equals(vector, that.vector)) {
return false;
}
return true;
}
......@@ -118,4 +128,3 @@ public class ParticipantNormalized {
return result;
}
}
......@@ -68,7 +68,6 @@ public class Document extends AbstractDocument implements Comparable<Document> {
public Document(String path, AbstractDocumentTemplate docTmp, List<ISemanticModel> models, Lang lang, boolean usePOSTagging) {
this(path, models, lang);
this.setText(docTmp.getText());
setDocTmp(docTmp);
Parsing.getParser(lang).parseDoc(docTmp, this, usePOSTagging);
}
......
......@@ -16,13 +16,14 @@
package com.readerbench.services.complexity.readability;
import com.readerbench.data.AbstractDocument;
import com.readerbench.services.complexity.ComplexityIndices;
import com.readerbench.services.complexity.ComplexityIndicesEnum;
/**
*
* @author Stefan Ruseti
*/
public class ReadabilityDaleChall extends ReadabilityIndex{
public class ReadabilityDaleChall extends ReadabilityIndex {
public ReadabilityDaleChall() {
super(ComplexityIndicesEnum.READABILITY_DALE_CHALL);
......@@ -30,7 +31,10 @@ public class ReadabilityDaleChall extends ReadabilityIndex{
@Override
public double compute(AbstractDocument d) {
if (d.getText() == null || d.getText().length() == 0) {
return ComplexityIndices.IDENTITY;
}
return computeDaleChall(d);
}
}
......@@ -6,13 +6,14 @@
package com.readerbench.services.complexity.readability;
import com.readerbench.data.AbstractDocument;
import com.readerbench.services.complexity.ComplexityIndices;
import com.readerbench.services.complexity.ComplexityIndicesEnum;
/**
*
* @author Stefan
*/
public class ReadabilityFlesch extends ReadabilityIndex{
public class ReadabilityFlesch extends ReadabilityIndex {
public ReadabilityFlesch() {
super(ComplexityIndicesEnum.READABILITY_FLESCH);
......@@ -20,8 +21,11 @@ public class ReadabilityFlesch extends ReadabilityIndex{
@Override
public double compute(AbstractDocument d) {
Fathom.Stats stats = Fathom.analyze(d.getProcessedText());
if (d.getText() == null || d.getText().length() == 0) {
return ComplexityIndices.IDENTITY;
}
Fathom.Stats stats = Fathom.analyze(d.getText());
return calcFlesch(stats);
}
}
......@@ -16,13 +16,14 @@
package com.readerbench.services.complexity.readability;
import com.readerbench.data.AbstractDocument;
import com.readerbench.services.complexity.ComplexityIndices;
import com.readerbench.services.complexity.ComplexityIndicesEnum;
/**
*
* @author Stefan Ruseti
*/
public class ReadabilityFog extends ReadabilityIndex{
public class ReadabilityFog extends ReadabilityIndex {
public ReadabilityFog() {
super(ComplexityIndicesEnum.READABILITY_FOG);
......@@ -30,8 +31,11 @@ public class ReadabilityFog extends ReadabilityIndex{
@Override
public double compute(AbstractDocument d) {
Fathom.Stats stats = Fathom.analyze(d.getProcessedText());
if (d.getText() == null || d.getText().length() == 0) {
return ComplexityIndices.IDENTITY;
}
Fathom.Stats stats = Fathom.analyze(d.getText());
return calcFog(stats);
}
}
\ No newline at end of file
}
......@@ -8,6 +8,7 @@ package com.readerbench.services.complexity.readability;
import com.readerbench.data.AbstractDocument;
import edu.stanford.nlp.util.Pair;
import com.readerbench.services.complexity.ComplexityIndex;
import com.readerbench.services.complexity.ComplexityIndices;
import com.readerbench.services.complexity.ComplexityIndicesEnum;
import com.readerbench.services.nlp.listOfWords.ClassesOfWords;
import com.readerbench.services.nlp.listOfWords.ListOfWords;
......
......@@ -16,13 +16,14 @@
package com.readerbench.services.complexity.readability;
import com.readerbench.data.AbstractDocument;
import com.readerbench.services.complexity.ComplexityIndices;
import com.readerbench.services.complexity.ComplexityIndicesEnum;
/**
*
* @author Stefan Ruseti
*/
public class ReadabilityKincaid extends ReadabilityIndex{
public class ReadabilityKincaid extends ReadabilityIndex {
public ReadabilityKincaid() {
super(ComplexityIndicesEnum.READABILITY_KINCAID);
......@@ -30,8 +31,11 @@ public class ReadabilityKincaid extends ReadabilityIndex{
@Override
public double compute(AbstractDocument d) {
Fathom.Stats stats = Fathom.analyze(d.getProcessedText());
if (d.getText() == null || d.getText().length() == 0) {
return ComplexityIndices.IDENTITY;
}
Fathom.Stats stats = Fathom.analyze(d.getText());
return calcKincaid(stats);
}
}
......@@ -138,8 +138,6 @@ public class ParticipantEvaluation {
Utterance u = (Utterance) b;
u.getParticipant().getIndices().put(CSCLIndices.SCORE,
u.getParticipant().getIndices().get(CSCLIndices.SCORE) + b.getScore());
u.getParticipant().getIndices().put(CSCLIndices.PERSONAL_KB,
u.getParticipant().getIndices().get(CSCLIndices.PERSONAL_KB) + u.getPersonalKB());
u.getParticipant().getIndices().put(CSCLIndices.SOCIAL_KB,
u.getParticipant().getIndices().get(CSCLIndices.SOCIAL_KB) + u.getSocialKB());
u.getParticipant().getIndices().put(CSCLIndices.NO_CONTRIBUTION,
......@@ -173,7 +171,7 @@ public class ParticipantEvaluation {
}
public static void performSNA(List<Participant> participants, double[][] participantContributions,
boolean needsAnonymization, String exportPath) {
boolean needsAnonymization, String exportPath) {
for (int index1 = 0; index1 < participants.size(); index1++) {
for (int index2 = 0; index2 < participants.size(); index2++) {
if (index1 != index2) {
......@@ -183,8 +181,7 @@ public class ParticipantEvaluation {
participants.get(index2).getIndices().put(CSCLIndices.INDEGREE,
participants.get(index2).getIndices().get(CSCLIndices.INDEGREE)
+ participantContributions[index1][index2]);
}
else {
} else {
participants.get(index1).getIndices().put(CSCLIndices.OUTDEGREE,
participants.get(index1).getIndices().get(CSCLIndices.OUTDEGREE)
+ participantContributions[index1][index1]);
......@@ -275,10 +272,10 @@ public class ParticipantEvaluation {
}
}
}
public static void extractRhythmicIndex(Conversation c) {
Map<Participant, List<Integer>> rhythmicIndPerPart = new TreeMap<>();
if (c.getParticipants().size() > 0) {
for (Block b : c.getBlocks()) {
if (b != null) {
......@@ -297,10 +294,11 @@ public class ParticipantEvaluation {
}
}
}
for (Entry<Participant, List<Integer>> entry : rhythmicIndPerPart.entrySet()) {
if (entry.getValue().isEmpty())
if (entry.getValue().isEmpty()) {
continue;
}
int maxIndex = Collections.max(entry.getValue());
entry.getKey().getIndices().put(CSCLIndices.RHYTHMIC_INDEX, 1.0 * maxIndex);
entry.getKey().getIndices().put(CSCLIndices.FREQ_MAX_INDEX, 1.0 * Collections.frequency(entry.getValue(),
......@@ -323,19 +321,22 @@ public class ParticipantEvaluation {
for (Sentence s : u.getSentences()) {
List<Word> unit = s.getAllWords();
List<Integer> repr = RhythmTool.getNumericalRepresentation(unit);
if (repr.isEmpty())
if (repr.isEmpty()) {
continue;
int NT = (repr.get(0) == 0) ? repr.size()-1 : repr.size();
}
int NT = (repr.get(0) == 0) ? repr.size() - 1 : repr.size();
int NA = repr.stream().mapToInt(Integer::intValue).sum();
Map<Integer, Integer> nrSylls = cntSyllables.get(p);
for (Integer nr : repr) {
if (nr == 0) continue;
if (nr == 0) {
continue;
}
nrSylls.put(nr,
nrSylls.containsKey(nr) ? nrSylls.get(nr)+1 : 1);
nrSylls.containsKey(nr) ? nrSylls.get(nr) + 1 : 1);
}
int devs = RhythmTool.calcDeviations(repr);
deviations.put(p,
deviations.containsKey(p) ? deviations.get(p)+1 : devs);
deviations.containsKey(p) ? deviations.get(p) + 1 : devs);
}
}
}
......@@ -348,12 +349,12 @@ public class ParticipantEvaluation {
double syllFreq = 1.0 * entry.getValue() / totalNumber;
}
int dominantInd = RhythmTool.getDominantIndex(nrSylls.values().stream()
.collect(Collectors.toList()));
.collect(Collectors.toList()));
int keyOfMaxVal = nrSylls.keySet().stream()
.collect(Collectors.toList()).get(dominantInd);
.collect(Collectors.toList()).get(dominantInd);
int sum = nrSylls.get(keyOfMaxVal);
sum += (nrSylls.containsKey(keyOfMaxVal-1)) ? nrSylls.get(keyOfMaxVal-1) : 0;
sum += (nrSylls.containsKey(keyOfMaxVal+1)) ? nrSylls.get(keyOfMaxVal+1) : 0;
sum += (nrSylls.containsKey(keyOfMaxVal - 1)) ? nrSylls.get(keyOfMaxVal - 1) : 0;
sum += (nrSylls.containsKey(keyOfMaxVal + 1)) ? nrSylls.get(keyOfMaxVal + 1) : 0;
double coeff = 1.0 * (deviations.get(p) + totalNumber - sum) / totalNumber;
p.getIndices().put(CSCLIndices.RHYTHMIC_COEFFICIENT, coeff);
}
......@@ -390,23 +391,23 @@ public class ParticipantEvaluation {
int index, size;
long diff;
size = (int)Math.ceil((double)chatTime / frameTime);
size = (int) Math.ceil((double) chatTime / frameTime);
for (Entry<Participant, List<Date>> entry : timestamps.entrySet()) {
List<Double> arr = new ArrayList<>(Collections.nCopies(size, 0.0));
for (Date d : entry.getValue()) {
diff = (d.getTime() - chatStartTime.getTime()) / 1000;
index = (int)Math.floor((double)diff / frameTime);
index = (int) Math.floor((double) diff / frameTime);
arr.set(index, arr.get(index) + 1);
}
noInterventions.put(entry.getKey(), arr);
}
for (Entry<Participant, List<Double>> entry : noInterventions.entrySet()) {
double value = CSCLCriteria.getValue(CSCLCriteria.PEAK_CHAT_FRAME,
double value = CSCLCriteria.getValue(CSCLCriteria.PEAK_CHAT_FRAME,
entry.getValue().stream().mapToDouble(d -> d).toArray());
entry.getKey().getIndices().put(CSCLIndices.PERSONAL_REGULARITY_ENTROPY, value);
// System.out.println(entry.getKey().getName() + " " + value);
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment