Commit 69e56f2e authored by ReaderBench's avatar ReaderBench

Eleasticsearch version bump. Fixed communities processing via akka.

parent 9754aad7
......@@ -80,6 +80,7 @@ public class CommunityProcessing {
community.setLastContributionDate(u.getTime());
}
Block.addBlock(participantToUpdate.getContributions(), b);
Block.addBlock(community.getEligibleContributions(), b);
if (b.isSignificant()) {
Block.addBlock(participantToUpdate.getSignificantContributions(), b);
}
......
......@@ -63,7 +63,9 @@ public class CommunityTimeProcessing {
}
private Community extractSubCommunity(Community community, Date startSubCommunities, Date endSubCommunities) {
Community subCommunity = new Community(community.getName(), community.getLanguage(), startSubCommunities, endSubCommunities);
Community subCommunity = new Community(community.getName(), community.getLanguage(), community.getSemanticModelsAsList(), startSubCommunities, endSubCommunities);
subCommunity.setEligibleContributions(new Conversation(null, community.getSemanticModelsAsList(), community.getLanguage()));
for (Conversation c : community.getConversations()) {
subCommunity.getConversations().add(c);
}
......
......@@ -15,7 +15,10 @@
*/
package com.readerbench.coreservices.data.cscl;
import com.readerbench.coreservices.data.AbstractDocument;
import com.readerbench.coreservices.data.AnalysisElement;
import com.readerbench.coreservices.semanticmodels.SemanticModel;
import com.readerbench.coreservices.semanticmodels.SimilarityType;
import com.readerbench.datasourceprovider.pojo.Lang;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -31,13 +34,16 @@ public class Community extends AnalysisElement {
private String name;
private List<Participant> participants;
private List<Conversation> conversations;
private AbstractDocument eligibleContributions;
private List<Community> timeframeSubCommunities;
private double[][] participantContributions;
private Date startDate, endDate;
private Date firstContributionDate, lastContributionDate;
public Community(String name, Lang lang, Date startDate, Date endDate) {
public Community(String name, Lang lang, List<SemanticModel> models, Date startDate, Date endDate) {
super(null, 0, null, null, lang);
super.setSemanticModels(models);
this.name = name;
this.startDate = startDate;
this.endDate = endDate;
......@@ -117,4 +123,12 @@ public class Community extends AnalysisElement {
public void setParticipantContributions(double[][] participantContributions) {
this.participantContributions = participantContributions;
}
public AbstractDocument getEligibleContributions() {
return eligibleContributions;
}
public void setEligibleContributions(AbstractDocument eligibleContributions) {
this.eligibleContributions = eligibleContributions;
}
}
......@@ -62,12 +62,12 @@
<dependency>
<groupId>org.elasticsearch.plugin</groupId>
<artifactId>transport-netty4-client</artifactId>
<version>6.2.4</version>
<version>6.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>7.2.1</version>
<version>7.4.0</version>
</dependency>
<dependency>
......
package com.readerbench.datasourceprovider.elasticsearch;
import com.fasterxml.jackson.databind.ObjectMapper;
import javafx.beans.binding.IntegerBinding;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchResponse;
......@@ -17,13 +18,13 @@ import org.json.simple.JSONObject;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.management.Query;
/**
* Created by dorinela on 24.12.2017.
*/
......@@ -31,13 +32,13 @@ public class ElasticsearchService {
private static final Logger LOGGER = LoggerFactory.getLogger(ElasticsearchService.class);
public static TransportClient client;
private static final String ELASTICSEARCH_HOST_ADDRESS = "141.85.232.48";
private static final String ELASTICSEARCH_HOST_ADDRESS = "127.0.0.1";
private static final Integer ELSTICSEARCH_PORT = 9300;
static {
try {
Settings settings = Settings.builder()
.put("cluster.name", "elasticsearch-readerbench").build();
.put("cluster.name", "elasticsearch").build();
client = new PreBuiltTransportClient(settings)
.addTransportAddress(new TransportAddress(InetAddress.getByName(ELASTICSEARCH_HOST_ADDRESS), ELSTICSEARCH_PORT));
} catch (Exception e) {
......@@ -153,8 +154,8 @@ public class ElasticsearchService {
// return result;
// }
public ArrayList<String> getAllGamesName() {
ArrayList<String> result = new ArrayList<>();
public static ArrayList<Game> getAllGames() {
ArrayList<Game> result = new ArrayList<>();
try {
QueryBuilder queryBuilder = QueryBuilders.matchAllQuery();
......@@ -169,7 +170,14 @@ public class ElasticsearchService {
Map data = new HashMap();
data = searchHit.getSourceAsMap();
String review = data.get("name").toString();
result.add(review);
Integer metascore = Integer.valueOf(data.get("metascore").toString());
if(data.get("userScore") != null) {
Float userscore = Float.valueOf(data.get("userScore").toString());
Game game = new Game(review, metascore, userscore);
result.add(game);
}
}
} catch (Exception e) {
e.printStackTrace();
......@@ -178,6 +186,7 @@ public class ElasticsearchService {
return result;
}
public ArrayList<String> searchByField(String field, String value) {
ArrayList<String> result = new ArrayList<>();
try {
......@@ -201,13 +210,77 @@ public class ElasticsearchService {
return result;
}
public void writeReviewsToFile(String fileName, List<String> reviews) {
public Set<Review> searchByFieldWithScore(String field, String value) {
Set<Review> result = new LinkedHashSet<>();
String newValue = "\"" + value + "\"";
try {
SearchResponse response = client.prepareSearch("reviews")
.setTypes("metacritic")
//.setSearchType(SearchType.DFS_QUERY_THEN_FETCH)
.setSize(10000)
.setQuery(QueryBuilders.matchPhraseQuery(field, newValue))
//.setQuery(QueryBuilders.termQuery(field, value))
.get();
SearchHit[] searchHits = response.getHits().getHits();
for (SearchHit searchHit : searchHits) {
Map data = new HashMap();
data = searchHit.getSourceAsMap();
String review = data.get("review").toString();
Integer score = Integer.valueOf(data.get("rate").toString());
Review r = new Review(review, score);
result.add(r);
}
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
/**
* for Alina experiments
* @param fileName
* @param reviews
*/
public void writeReviewsToFile(String fileName, Set<Review> reviews) {
try {
BufferedWriter writer = new BufferedWriter(new FileWriter(fileName));
// Loop over the elements in the string array and write each line.
for (Review review : reviews) {
writer.write(review.getScore()+ "\t" + review.getReview() );
writer.newLine();
}
//writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public void writeReviewsScoreToFile(String fileName, List<Review> reviews) {
try {
BufferedWriter writer = new BufferedWriter(new FileWriter(fileName));
// Loop over the elements in the string array and write each line.
for (Review review : reviews) {
writer.write(review.getScore().toString());
writer.newLine();
}
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public void writeMetascoreGamesToFile(String fileName, List<Game> games) {
try {
BufferedWriter writer = new BufferedWriter(new FileWriter(fileName));
// Loop over the elements in the string array and write each line.
for (String line : reviews) {
writer.write(line);
for (Game game : games) {
writer.write(game.getMetascore().toString());
writer.newLine();
}
writer.close();
......@@ -217,24 +290,72 @@ public class ElasticsearchService {
}
public void writeUserscoreGamesToFile(String fileName, List<Game> games) {
try {
BufferedWriter writer = new BufferedWriter(new FileWriter(fileName));
// Loop over the elements in the string array and write each line.
for (Game game : games) {
writer.write(game.getUserscore().toString());
writer.newLine();
}
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
//writeMetaAndUserScore();
writeReviewsForAllGamesToFile();
}
private static void writeReviewsForAllGamesToFile() {
ElasticsearchService elasticsearchService = new ElasticsearchService();
List<Game> games = getAllGames();
System.out.println("Total number of games are: " + games.size());
Set<Review> allReviews = new HashSet<>();
for (Game game : games) {
Set<Review> reviews = elasticsearchService.searchByFieldWithScore("game", game.getName());
System.out.println("Write " + reviews.size() + " reviews for game " + game.getName());
if (reviews != null && reviews.size() > 0) {
// for (Review review : reviews) {
// if (!allReviews.contains(review)) {
// allReviews.add(review);
// }
// }
allReviews.addAll(reviews);
}
}
elasticsearchService.writeReviewsToFile("C:\\Users\\Administrator\\ownCloud\\ReaderBench\\in\\Metacritic reviews\\reviews-09-july-2018\\metacritic-reviews.txt", allReviews);
System.out.println("-------Finish---------");
}
private static void writeMetaAndUserScore() {
ElasticsearchService elasticsearchService = new ElasticsearchService();
List<String> games = elasticsearchService.getAllGamesName();
List<Game> games = elasticsearchService.getAllGames();
List<Game> remainedGames = new ArrayList<>();
System.out.println("Total number of games: " + games.size());
for (String game : games) {
ArrayList<String> reviews = elasticsearchService.searchByField("game", game);
System.out.println("Total Number of reviews for game " + game + " are " + reviews.size());
for (Game game : games) {
Set<Review> reviews = elasticsearchService.searchByFieldWithScore("game", game.getName());
System.out.println("Total Number of reviews for game " + game.getName() + " are " + reviews.size());
if (reviews != null && !reviews.isEmpty()) {
String gameName = game.replaceAll(":", "")
.replaceAll("-", "").replaceAll("\\?", "")
.replaceAll("!", "").replaceAll("/", "").replaceAll("/", "")
.replaceAll("\'", "");
elasticsearchService.writeReviewsToFile("C:\\Users\\Administrator\\Desktop\\projects\\resources\\reviews\\" + gameName + ".txt", reviews);
remainedGames.add(game);
}
}
elasticsearchService.writeMetascoreGamesToFile("C:\\Users\\Administrator\\ownCloud\\ReaderBench\\in\\Metacritic reviews\\reviews-08-july-2018\\games-metascore.txt", remainedGames);
elasticsearchService.writeUserscoreGamesToFile("C:\\Users\\Administrator\\ownCloud\\ReaderBench\\in\\Metacritic reviews\\reviews-08-july-2018\\games-userscore.txt", remainedGames);
}
}
package com.readerbench.datasourceprovider.elasticsearch;
public class Game {
String name;
Integer metascore;
Float userscore;
public Game(String name, Integer metascore, Float userscore) {
this.name = name;
this.metascore = metascore;
this.userscore = userscore;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public Integer getMetascore() {
return metascore;
}
public void setMetascore(Integer metascore) {
this.metascore = metascore;
}
public Float getUserscore() {
return userscore;
}
public void setUserscore(Float userscore) {
this.userscore = userscore;
}
}
package com.readerbench.datasourceprovider.elasticsearch;
import java.util.Objects;
public class Review {
String review;
Integer score;
public Review(String review, Integer score) {
this.review = review;
this.score = score;
}
public String getReview() {
return review;
}
public void setReview(String review) {
this.review = review;
}
public Integer getScore() {
return score;
}
public void setScore(Integer score) {
this.score = score;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Review review1 = (Review) o;
return Objects.equals(review, review1.review) &&
Objects.equals(score, review1.score);
}
@Override
public int hashCode() {
return Objects.hash(review, score);
}
}
......@@ -22,5 +22,5 @@ public class ConversationActorSystem {
public static final ActorRef PROCESSING_WORKER = ACTOR_SYSTEM.actorOf(ConversationWorkerActor.props(NUMBER_OF_WORKERS_ACTORS), "convert-document-worker");
public static final ActorRef PROCESSING_MASTER = ACTOR_SYSTEM.actorOf(ConversationMasterActor.props(NUMBER_OF_MASTER_ACTORS), "convert-document-master");
public static final long TIMEOUT = 1000 * 1000l;
public static final long TIMEOUT = 20000000;
}
......@@ -29,7 +29,7 @@
<weka-dev.version>3.9.1</weka-dev.version>
<svm.version>3.22</svm.version>
<elasticsearch.version>6.2.4</elasticsearch.version>
<elasticsearch.version>6.4.0</elasticsearch.version>
<jackson-databind.version>2.8.8</jackson-databind.version>
<libthrift.version>0.10.0</libthrift.version>
......
......@@ -24,6 +24,7 @@ import com.readerbench.coreservices.data.cscl.Participant;
import com.readerbench.coreservices.data.Block;
import com.readerbench.coreservices.data.Word;
import com.readerbench.coreservices.semanticmodels.SemanticModel;
import com.readerbench.coreservices.semanticmodels.SimilarityType;
import com.readerbench.datasourceprovider.pojo.Lang;
import com.readerbench.processingservice.Annotators;
import java.util.Date;
......@@ -49,8 +50,9 @@ public class CommunityProcessingPipeline extends ConversationProcessingPipeline
super(lang, models, annotators);
}
public Community createCommunityFromConversations(String name, List<Conversation> conversations, Date startDate, Date endDate) {
Community community = new Community(name, getLanguage(), startDate, endDate);
public Community createCommunityFromConversations(String name, List<Conversation> conversations, List<SemanticModel> models, Date startDate, Date endDate) {
Community community = new Community(name, getLanguage(), models, startDate, endDate);
community.setEligibleContributions(new Conversation(null, community.getSemanticModelsAsList(), community.getLanguage()));
community.setName(name);
for (Conversation c : conversations) {
......
......@@ -108,13 +108,27 @@ public class ProcessDocumentCollection {
}
public static void main(String[] args) {
Lang lang = Lang.en;
/*Lang lang = Lang.en;
List<SemanticModel> models = SemanticModel.loadModels("tasa", lang);
String path = "../resources/in/essays/all essays";
Txt2XmlConverter converter = new Txt2XmlConverter(lang);
converter.parseTxtFiles(path, lang, "UTF-8", false);
ProcessDocumentCollection processing = new ProcessDocumentCollection();
processing.processTexts(path, models, lang, false);*/
processQuantitativeAnalysis();
}
private static void processQuantitativeAnalysis() {
Lang lang = Lang.en;
List<SemanticModel> models = SemanticModel.loadModels("coca", lang);
String path = "C:\\Users\\Administrator\\Nextcloud\\ReaderBench\\in\\NATO\\Corpus analiza cantitativa - v5\\integrative nou 1991-2018";
Txt2XmlConverter converter = new Txt2XmlConverter(lang);
converter.parseTxtFiles(path, lang, "UTF-8", false);
ProcessDocumentCollection processing = new ProcessDocumentCollection();
processing.processTexts(path, models, lang, false);
}
......
......@@ -15,13 +15,22 @@
*/
package com.readerbench.processingservice.exportdata;
import com.readerbench.coreservices.cna.extendedcna.distancestrategies.AuthorDistanceStrategyType;
import com.readerbench.coreservices.data.AbstractDocument;
import com.readerbench.coreservices.data.cscl.CSCLIndices;
import com.readerbench.coreservices.data.cscl.Community;
import com.readerbench.coreservices.data.cscl.Participant;
import com.readerbench.coreservices.data.discourse.SemanticCohesion;
import com.readerbench.coreservices.keywordmining.Keyword;
import com.readerbench.coreservices.keywordmining.KeywordModeling;
import com.readerbench.datasourceprovider.commons.Formatting;
import java.text.DateFormat;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.*;
import org.HdrHistogram.DoubleLinearIterator;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.slf4j.Logger;
......@@ -198,4 +207,197 @@ public class ExportCommunityToES {
return participantsStats;
}
/**
* Get data fro trend chart for entire community
*
* @return
*/
public List<Map<String, Object>> getContributionsForTrend() {
List<Map<String, Object>> communityResult = new ArrayList<>();
for (Community subCommunity : community.getTimeframeSubCommunities()) {
List<Double> subcommunityValues = new ArrayList<>();
for (int index = 0; index < subCommunity.getParticipants().size(); index++) {
Participant p = subCommunity.getParticipants().get(index);
if (p.getContributions().getNoBlocks() > 0) {
Double score = p.getIndices().get(CSCLIndices.SCORE);
subcommunityValues.add(score);
}
}
subcommunityValues.sort(Comparator.naturalOrder());
Map<String, Object> subcommunityResult = new HashMap<>();
Date startDate = subCommunity.getStartDate() != null ? subCommunity.getStartDate() : subCommunity.getFistContributionDate();
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
subcommunityResult.put("date", dateFormat.format(startDate));
DecimalFormat df = new DecimalFormat(".##");
subcommunityResult.put("pct05", Double.valueOf(df.format(subcommunityValues.get(Math.min((int) Math.round(.05 * subcommunityValues.size()),subcommunityValues.size()-1)))));
subcommunityResult.put("pct25", Double.valueOf(df.format(subcommunityValues.get(Math.min((int) Math.round(.25 * subcommunityValues.size()),subcommunityValues.size()-1)))));
subcommunityResult.put("pct50", Double.valueOf(df.format(subcommunityValues.get(Math.min((int) Math.round(.50 * subcommunityValues.size()),subcommunityValues.size()-1)))));
subcommunityResult.put("pct75", Double.valueOf(df.format(subcommunityValues.get(Math.min((int) Math.round(.75 * subcommunityValues.size()),subcommunityValues.size()-1)))));
subcommunityResult.put("pct95", Double.valueOf(df.format(subcommunityValues.get(Math.min((int) Math.round(.95 * subcommunityValues.size()),subcommunityValues.size()-1)))));
communityResult.add(subcommunityResult);
}
return communityResult;
}
/**
* Get data for timeline evolution of global participation
* @return
*/
public List<Map<String, Object>> getGlobalTimelineEvolution() {
List<Map<String, Object>> communityResult = new ArrayList<>();
for (Community subCommunity : community.getTimeframeSubCommunities()) {
Double density = 0d;
int noParticipants = 0;
if (subCommunity.getEligibleContributions().getNoBlocks() > 0) {
for (Participant participant : subCommunity.getParticipants()) {
if (participant.getContributions().getNoBlocks() > 0) {
noParticipants ++;
}
}
for (int row = 0; row < subCommunity.getParticipantContributions().length; row++) {
for (int col = row; col < subCommunity.getParticipantContributions()[row].length; col++) {
if (row!=col && subCommunity.getParticipantContributions()[row][col] > 0) {
density ++;
}
}
}
if (subCommunity.getEligibleContributions().getNoBlocks() == 1 && noParticipants == 1) {
density = 0.0;
} else {
density = density / (noParticipants * (noParticipants - 1));
}
}
Map<String, Object> subcommunityResult = new HashMap<>();
Date startDate = subCommunity.getStartDate() != null ? subCommunity.getStartDate() : subCommunity.getFistContributionDate();
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
subcommunityResult.put("date", dateFormat.format(startDate));
subcommunityResult.put("participants", noParticipants);
subcommunityResult.put("contributions", subCommunity.getEligibleContributions().getNoBlocks());
subcommunityResult.put("density", density);
communityResult.add(subcommunityResult);
}
return communityResult;
}
public Map<String, List<Integer>> getKeywordsSimilarity(double threshold, int maxTimeframeTopics) {
AbstractDocument eligibleContributions = community.getEligibleContributions();
eligibleContributions.determineWordOccurences(eligibleContributions.getBlocks());
eligibleContributions.determineSemanticDimensions();
KeywordModeling.determineKeywords(eligibleContributions, false);
//determine maximum #maxTimeframeTopics topics that are only nouns and verbs
List<Keyword> keywords = KeywordModeling.getSublist(eligibleContributions.getTopics(),
maxTimeframeTopics,
true,
true);
Map<String, List<Integer>> keywordsResult = new HashMap<>();
for (Keyword t1 : keywords) {
List<Integer> row = new ArrayList<>();
System.out.print(t1.getWord().getLemma() + " : ");
for (Keyword t2 : keywords) {
int value = 0;
if (!t1.equals(t2)) {
double sim = SemanticCohesion.getAverageSemanticModelSimilarity(t1.getElement(), t2.getElement());
if (sim > threshold) {
value = (int) Math.ceil((sim - threshold) * 10);
row.add(value);
} else {
row.add(0);
}
} else {
row.add(0);
}
System.out.print(value + ", ");
}
System.out.println("\n");
keywordsResult.put(t1.getWord().getLemma(), row);
}
return keywordsResult;
}
/**
* Builds keywords for heap map. Only for the entire community (week = 0).
* @param week
* @return
*/
public JSONObject buildKeywordsForHeapMap(Integer week, Community community) {
LOGGER.info("Write keywords to Elasticsearch");
JSONObject result = new JSONObject();
result.put("communityName", community.getName());
result.put("week", week);
Date startDate = community.getStartDate() != null ? community.getStartDate() : community.getFistContributionDate();
Date endDate = community.getEndDate() != null ? community.getEndDate() : community.getLastContributionDate();
result.put("startDate", startDate);
result.put("endDate", endDate);
Map<String, double[]> keywords = exportDiscussedTopicsPerTimeframe(10, community);
Map<String, List<Double>> finalResult = new HashMap<>();
keywords.forEach((k,v)->{
List<Double> values = new ArrayList<>();