Commit 9d59b403 authored by Teodor Mihai Cotet's avatar Teodor Mihai Cotet

added right diacritics for ro, search lowercase words in dict - ro

parent 69e56f2e
......@@ -235,8 +235,8 @@ public class Word extends AnalysisElement implements Comparable<Word>, Serializa
if (this.getText().length() > 1
&& !StopWords.isStopWord(this.getText(), getLanguage())
&& !StopWords.isStopWord(this.getLemma(), getLanguage())
&& (Dictionary.isDictionaryWord(this.getText(), getLanguage())
|| Dictionary.isDictionaryWord(this.getLemma(), getLanguage()))) {
&& (Dictionary.isDictionaryWord(this.getText().toLowerCase(), getLanguage())
|| Dictionary.isDictionaryWord(this.getLemma().toLowerCase(), getLanguage()))) {
if (this.getPOS() != null) {
return this.getPOS().equals("NN") || this.getPOS().equals("VB") || this.getPOS().equals("JJ") || this.getPOS().equals("RB");
}
......
......@@ -89,7 +89,7 @@ public class TextPreprocessing {
pattern = "[a-zàâäæçéêèëîïôœùûü]+";
break;
case ro:
pattern = "[a-zăâîşţ]+";
pattern = "[a-zăâîşţșț]+";
break;
case es:
pattern = "[a-zñóéíáúü]+";
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment