Commit 1f4d2820 authored by stefan's avatar stefan

removed unnecessary lexical chains distances

parent 74ae53ee
......@@ -6,7 +6,7 @@
<groupId>com.readerbench</groupId>
<artifactId>${artifactory.id}</artifactId>
<version>3.0.0</version>
<version>3.0.1</version>
<packaging>jar</packaging>
<properties>
......
......@@ -209,10 +209,6 @@ public abstract class AbstractDocument extends AnalysisElement {
// System.out.println(chain);
// }
LOGGER.info("Compute word distances");
DisambiguisationGraphAndLexicalChains.computeWordDistances(this);
// System.out.println(LexicalCohesion.getDocumentCohesion(this));
// determine semantic chains / voices
LOGGER.info("Determine semantic chains / voices");
DialogismComputations.determineVoices(this);
......
......@@ -128,15 +128,6 @@ public class Word extends AnalysisElement implements Comparable<Word>, Serializa
return w;
}
public double getDistanceInChain(Word word) {
if (!partOfSameLexicalChain(word)) {
return Double.MAX_VALUE;
} else {
LexicalChain chain = word.getLexicalChainLink().getLexicalChain();
return chain.getDistance(word.getLexicalChainLink(), word.getLexicalChainLink());
}
}
public boolean isNoun() {
return POS.startsWith("NN");
}
......
......@@ -23,138 +23,87 @@ import java.util.*;
import java.util.stream.Collectors;
/**
*
*
* @authors Ioana Serban, Mihai Dascalu
*/
public class LexicalChain implements Serializable {
private static final long serialVersionUID = -4724528858130546429L;
private final Set<LexicalChainLink> links = new HashSet<>();
private final HashMap<LexicalChainLink, HashMap<LexicalChainLink, Double>> distanceMap = new HashMap<>();
public boolean addLink(LexicalChainLink link) {
link.setLexicalChain(this);
return links.add(link);
}
public boolean containsWord(Word word) {
for (LexicalChainLink link : links) {
if (link.getWord() == word)
return true;
}
return false;
}
public LexicalChainLink getLink(Word word) {
for (LexicalChainLink link : links) {
if (link.getWord() == word)
return link;
}
return null;
}
private static final long serialVersionUID = -4724528858130546429L;
/**
* Applies a Floyd-Warshall algorithm to the lexical chain to determine
* distances between all words in the chain.
*/
public void computeDistances() {
// initialize distanceMap structure
for (LexicalChainLink link : links) {
distanceMap.put(link, new HashMap<LexicalChainLink, Double>());
}
for (Iterator<LexicalChainLink> it = links.iterator(); it.hasNext();) {
LexicalChainLink link = it.next();
for (Map.Entry<LexicalChainLink, Double> e : link.getConnections()
.entrySet()) {
// set distances between links that are directly connected
// the distance of an edge in the graph is the inverse of the
// weight in the disambiguation graph
setDistance(link, e.getKey(), (1.0 / e.getValue()));
}
}
private final Set<LexicalChainLink> links = new HashSet<>();
public boolean addLink(LexicalChainLink link) {
link.setLexicalChain(this);
return links.add(link);
}
// apply the Floyd-Warshall algorithm
for (Iterator<LexicalChainLink> it1 = links.iterator(); it1.hasNext();) {
LexicalChainLink link1 = it1.next();
for (Iterator<LexicalChainLink> it2 = links.iterator(); it2
.hasNext();) {
LexicalChainLink link2 = it2.next();
for (Iterator<LexicalChainLink> it3 = links.iterator(); it3
.hasNext();) {
LexicalChainLink link3 = it3.next();
double d1 = getDistance(link2, link3);
double d2 = getDistance(link2, link1)
+ getDistance(link1, link3);
if (d2 < d1) {
setDistance(link2, link3, d2);
}
}
}
}
}
public double getDistance(LexicalChainLink link1, LexicalChainLink link2) {
HashMap<LexicalChainLink, Double> mapLink1 = distanceMap.get(link1);
Double distance = mapLink1.get(link2);
if (distance == null) {
return Double.MAX_VALUE;
}
return distance;
}
public boolean containsWord(Word word) {
for (LexicalChainLink link : links) {
if (link.getWord() == word) {
return true;
}
}
return false;
}
public void setDistance(LexicalChainLink link1, LexicalChainLink link2,
Double distance) {
distanceMap.get(link1).put(link2, distance);
}
public LexicalChainLink getLink(Word word) {
for (LexicalChainLink link : links) {
if (link.getWord() == word) {
return link;
}
}
return null;
}
@Override
public String toString() {
public String toString() {
Map<String, Integer> count = new HashMap<>();
for (LexicalChainLink link : links) {
String word = link.getWord().getLemma();
if (!count.containsKey(word)) count.put(word, 1);
else count.put(word, count.get(word) + 1);
if (!count.containsKey(word)) {
count.put(word, 1);
} else {
count.put(word, count.get(word) + 1);
}
}
List<String> entries = count.entrySet().stream()
.sorted((e1, e2) -> e2.getValue() - e1.getValue())
.map(e -> e.getKey() + "(" + e.getValue() + ")")
.collect(Collectors.toList());
return "(" + StringUtils.join(entries, ", ") + ")";
}
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result
+ ((distanceMap == null) ? 0 : distanceMap.hashCode());
result = prime * result + ((links == null) ? 0 : links.hashCode());
return result;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((links == null) ? 0 : links.hashCode());
return result;
}
public Set<LexicalChainLink> getLinks() {
return links;
}
public Set<LexicalChainLink> getLinks() {
return links;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
LexicalChain other = (LexicalChain) obj;
if (distanceMap == null) {
if (other.distanceMap != null)
return false;
} else if (!distanceMap.equals(other.distanceMap))
return false;
if (links == null) {
if (other.links != null)
return false;
} else if (!links.equals(other.links))
return false;
return true;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
LexicalChain other = (LexicalChain) obj;
if (links == null) {
if (other.links != null) {
return false;
}
} else if (!links.equals(other.links)) {
return false;
}
return true;
}
}
/*
* Copyright 2016 ReaderBench.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.readerbench.services.complexity.cohesion.lexical;
import com.readerbench.data.AbstractDocument;
import com.readerbench.services.complexity.ComplexityIndicesEnum;
import com.readerbench.services.semanticModels.SimilarityType;
/**
*
* @author Stefan Ruseti
*/
public class AvgIntraSentenceLexicalCohesion extends LexicalCohesion{
public AvgIntraSentenceLexicalCohesion(SimilarityType simType) {
super(ComplexityIndicesEnum.AVERAGE_INTRA_SENTENCE_LEXICAL_COHESION);
}
@Override
public double compute(AbstractDocument d) {
return d.getSentencesInDocument().parallelStream()
.mapToDouble(s -> getIntraSentenceCohesion(s))
.average().orElse(0.);
}
}
/*
* Copyright 2016 ReaderBench.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.readerbench.services.complexity.cohesion.lexical;
import com.readerbench.data.AbstractDocument;
import com.readerbench.services.complexity.ComplexityIndicesEnum;
import com.readerbench.services.semanticModels.SimilarityType;
/**
*
* @author Stefan Ruseti
*/
public class AvgLexicalBlockCohesion extends LexicalCohesion{
public AvgLexicalBlockCohesion(SimilarityType simType) {
super(ComplexityIndicesEnum.AVERAGE_LEXICAL_BLOCK_COHESION);
}
@Override
public double compute(AbstractDocument d) {
return d.getBlocks().parallelStream()
.filter(b -> b != null)
.mapToDouble(b -> getBlockCohesion(b))
.average().orElse(0.);
}
}
/*
* Copyright 2016 ReaderBench.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.readerbench.services.complexity.cohesion.lexical;
import com.readerbench.data.AbstractDocument;
import com.readerbench.services.complexity.ComplexityIndicesEnum;
import com.readerbench.services.semanticModels.SimilarityType;
/**
*
* @author Stefan Ruseti
*/
public class AvgLexicalBlockCohesionAdjacentSentences extends LexicalCohesion{
public AvgLexicalBlockCohesionAdjacentSentences(SimilarityType simType) {
super(ComplexityIndicesEnum.AVERAGE_LEXICAL_BLOCK_COHESION_ADJACENT_SENTENCES);
}
@Override
public double compute(AbstractDocument d) {
return d.getBlocks().parallelStream()
.filter(b -> b != null)
.mapToDouble(b -> getBlockCohesionAdjacentSentences(b))
.average().orElse(0.);
}
}
\ No newline at end of file
/*
* Copyright 2016 ReaderBench.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.readerbench.services.complexity.cohesion.lexical;
import com.readerbench.data.Block;
import com.readerbench.data.Sentence;
import com.readerbench.data.Word;
import com.readerbench.services.complexity.ComplexityIndex;
import com.readerbench.services.complexity.ComplexityIndicesEnum;
import java.util.Iterator;
/**
*
* @author Stefan Ruseti
*/
public abstract class LexicalCohesion extends ComplexityIndex{
public LexicalCohesion(ComplexityIndicesEnum index) {
super(index);
}
protected static double getIntraSentenceCohesion(Sentence s) {
double distSum = 0;
for (int i = 0; i < s.getWords().size() - 1; i++) {
double minDist = Double.MAX_VALUE;
for (int j = i + 1; j < s.getWords().size(); j++) {
double d = s.getWords().get(i)
.getDistanceInChain(s.getWords().get(j));
if (d < minDist) {
minDist = d;
}
}
if (minDist != Double.MAX_VALUE) {
distSum += minDist;
}
}
double cohesion = (s.getWords().size() > 0 ? distSum
/ s.getWords().size() : 0);
return cohesion;
}
/**
* Cohesion within a block measured as the mean cohesion of adjacent
* sentences.
*/
public static double getBlockCohesionAdjacentSentences(Block b) {
double cohesionSum = 0;
Iterator<Sentence> it1 = b.getSentences().iterator();
Iterator<Sentence> it2 = b.getSentences().iterator();
// second iterator starts from second sentence
if (it2.hasNext()) {
it2.next();
}
while (it2.hasNext()) {
Sentence s1 = it1.next();
Sentence s2 = it2.next();
cohesionSum += getInterSentenceCohesion(s1, s2);
}
double denominator = b.getSentences().size() - 1;
double cohesion = (denominator > 0 ? cohesionSum / denominator : 0);
return cohesion;
}
/**
* Cohesion between two utterances is measured as being : c = 1/distance
* where distance(u1, u2) = SUM<w1 in u1>(MIN<w2 in u2>(dist(w1, w2)))
*/
protected static double getInterSentenceCohesion(Sentence s1, Sentence s2) {
double distSum = 0;
for (Word word1 : s1.getWords()) {
double minDist = Double.MAX_VALUE;
for (Word word2 : s2.getWords()) {
double d = word1.getDistanceInChain(word2);
if (d < minDist) {
minDist = d;
}
}
if (minDist != Double.MAX_VALUE) {
distSum += minDist;
}
}
double cohesion = (Math.min(s1.getWords().size(), s2.getWords().size()) > 0 ? distSum
/ Math.min(s1.getWords().size(), s2.getWords().size())
: 0);
return cohesion;
}
protected static double getBlockCohesion(Block b) {
double interCohesionSum = 0;
double intraCohesionSum = 0;
for (int i = 0; i < b.getSentences().size() - 1; i++) {
for (int j = i + 1; j < b.getSentences().size(); j++) {
interCohesionSum += getInterSentenceCohesion(b.getSentences()
.get(i), b.getSentences().get(j));
}
}
for (int i = 0; i < b.getSentences().size(); i++) {
intraCohesionSum += getIntraSentenceCohesion(b.getSentences()
.get(i));
}
// add intra with inter-cohesion between utterances
double denominator = (b.getSentences().size() - 1)
* b.getSentences().size() / 2;
double cohesion = (denominator > 0 ? interCohesionSum / denominator : 0);
denominator = b.getSentences().size();
cohesion += (denominator > 0 ? intraCohesionSum / denominator : 0);
return cohesion;
}
}
......@@ -146,14 +146,4 @@ public class DisambiguisationGraphAndLexicalChains {
}
}
/**
* Computes the word distances between the words in the lexical chains.
*/
public static void computeWordDistances(AbstractDocument d) {
LOGGER.info("Computing all lexical chains distances");
for (LexicalChain chain : d.getLexicalChains()) {
chain.computeDistances();
}
}
}
/*
* Copyright 2016 ReaderBench.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.readerbench.services.discourse.cohesion;
import com.readerbench.data.AbstractDocument;
import com.readerbench.data.Block;
import com.readerbench.data.Sentence;
import com.readerbench.data.Word;
import java.io.Serializable;
import java.util.Iterator;
public class LexicalCohesion implements Serializable {
private static final long serialVersionUID = -2519627724570229014L;
/**
* Cohesion between two utterances is measured as being : c = 1/distance
* where distance(s1, s2) = SUM<w1 in s1>(MIN<w2 in s2>(dist(w1, w2)))
* @param s
*/
public static double getIntraSentenceCohesion(Sentence s) {
double distSum = 0;
for (int i = 0; i < s.getWords().size() - 1; i++) {
double minDist = Double.MAX_VALUE;
for (int j = i + 1; j < s.getWords().size(); j++) {
double d = s.getWords().get(i)
.getDistanceInChain(s.getWords().get(j));
if (d < minDist) {
minDist = d;
}
}
if (minDist != Double.MAX_VALUE) {
distSum += minDist;
}
}
double cohesion = (s.getWords().size() > 0 ? distSum
/ s.getWords().size() : 0);
System.out.println("Intra-utterance cohesion "
+ s.getContainer().getIndex() + "/" + s.getIndex() + ": "
+ cohesion);
return cohesion;
}
/**
* Cohesion between two utterances is measured as being : c = 1/distance
* where distance(s1, s2) = SUM<w1 in s1>(MIN<w2 in s2>(dist(w1, w2)))
* @param s1
* @param s2
*/
public static double getInterSentenceCohesion(Sentence s1, Sentence s2) {
double distSum = 0;
for (Word word1 : s1.getWords()) {
double minDist = Double.MAX_VALUE;
for (Word word2 : s2.getWords()) {
double d = word1.getDistanceInChain(word2);
if (d < minDist) {
minDist = d;
}
}
if (minDist != Double.MAX_VALUE) {
distSum += minDist;
}
}
double cohesion = (Math.min(s1.getWords().size(), s2.getWords().size()) > 0 ? distSum
/ Math.min(s1.getWords().size(), s2.getWords().size())
: 0);
System.out.println("Inter-utterance cohesion "
+ s1.getContainer().getIndex() + "(" + s1.getIndex() + "-"
+ s2.getIndex() + "): " + cohesion);
return cohesion;
}
/**
* Cohesion within a block measured as the mean cohesion of adjacent
* sentences.
* @param b
*/
public static double getBlockCohesionAdjacentUtterances(Block b) {
double cohesionSum = 0;
Iterator<Sentence> it1 = b.getSentences().iterator();
Iterator<Sentence> it2 = b.getSentences().iterator();
// second iterator starts from second sentence
if (it2.hasNext()) {
it2.next();
}
while (it2.hasNext()) {
Sentence s1 = it1.next();
Sentence s2 = it2.next();
cohesionSum += getInterSentenceCohesion(s1, s2);
}
double denominator = b.getSentences().size() - 1;
double cohesion = (denominator > 0 ? cohesionSum / denominator : 0);
return cohesion;
}
public static double getBlockCohesion(Block b) {
double interCohesionSum = 0;
double intraCohesionSum = 0;
for (int i = 0; i < b.getSentences().size() - 1; i++) {
for (int j = i + 1; j < b.getSentences().size(); j++) {
interCohesionSum += getInterSentenceCohesion(b.getSentences()
.get(i), b.getSentences().get(j));
}
}
for (int i = 0; i < b.getSentences().size(); i++) {
intraCohesionSum += getIntraSentenceCohesion(b.getSentences().get(
i));
}
// add intra with inter-cohesion between utterances
double denominator = b.getSentences().size() - 1;
double cohesion = (denominator > 0 ? interCohesionSum / denominator : 0);
denominator = b.getSentences().size();
cohesion += (denominator > 0 ? intraCohesionSum / denominator : 0);
return cohesion;
}
/**
* Document cohesion computed as the mean of block cohesion calculated as
* cohesion between adjacent sentences.
* @param d
*/
public static double getDocumentCohesion(AbstractDocument d) {
double cohesionSum = 0;
for (Block b : d.getBlocks()) {
if (b != null) {
// cohesionSum += getBlockCohesionAdjacentUtterances(b);
cohesionSum += getBlockCohesion(b);
}
}
double cohesion = (d.getBlocks().size() > 0 ? cohesionSum
/ (double) d.getBlocks().size() : 0);
return cohesion;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment