projects
/
pnews.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
e93f5e6
)
added support of entity aliases
author
Jean-Philippe Orsini
<orsinije@fr.ibm.com>
Sat, 4 Nov 2017 23:13:41 +0000
(
00:13
+0100)
committer
Jean-Philippe Orsini
<orsinije@fr.ibm.com>
Sat, 4 Nov 2017 23:13:41 +0000
(
00:13
+0100)
war/src/main/java/pnews/NER.java
patch
|
blob
|
history
war/src/main/java/pnews/OpenNLP.java
patch
|
blob
|
history
war/src/main/java/pnews/servlet/Config.java
patch
|
blob
|
history
war/src/main/resources/feeds.json
patch
|
blob
|
history
diff --git
a/war/src/main/java/pnews/NER.java
b/war/src/main/java/pnews/NER.java
index
2055cf1
..
5e7ce29
100644
(file)
--- a/
war/src/main/java/pnews/NER.java
+++ b/
war/src/main/java/pnews/NER.java
@@
-33,7
+33,7
@@
public class NER {
for (Triple<String, Integer, Integer> t: triples) {
w = str.substring(t.second, t.third);
if (!config.isBlacklistedEntity(w) && !entities.contains(w))
for (Triple<String, Integer, Integer> t: triples) {
w = str.substring(t.second, t.third);
if (!config.isBlacklistedEntity(w) && !entities.contains(w))
- entities.add(
w
);
+ entities.add(
config.getEntityAlias(w)
);
}
LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities);
}
LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities);
diff --git
a/war/src/main/java/pnews/OpenNLP.java
b/war/src/main/java/pnews/OpenNLP.java
index
c383cee
..
e158a00
100644
(file)
--- a/
war/src/main/java/pnews/OpenNLP.java
+++ b/
war/src/main/java/pnews/OpenNLP.java
@@
-97,7
+97,7
@@
public class OpenNLP {
LOG.finest(entity + " " + s.getProb() + " " + s.toString());
if (!config.isBlacklistedEntity(entity) && !entities.contains(entity))
LOG.finest(entity + " " + s.getProb() + " " + s.toString());
if (!config.isBlacklistedEntity(entity) && !entities.contains(entity))
- entities.add(
entity
);
+ entities.add(
config.getEntityAlias(entity)
);
}
return entities;
}
return entities;
diff --git
a/war/src/main/java/pnews/servlet/Config.java
b/war/src/main/java/pnews/servlet/Config.java
index
fec3770
..
46e75f6
100644
(file)
--- a/
war/src/main/java/pnews/servlet/Config.java
+++ b/
war/src/main/java/pnews/servlet/Config.java
@@
-28,6
+28,7
@@
public class Config {
private Category[] categories;
private Language[] languages;
private final Set<String> blacklistedEntities = new HashSet<>();
private Category[] categories;
private Language[] languages;
private final Set<String> blacklistedEntities = new HashSet<>();
+ private final HashMap<String, String> entityAliases = new HashMap<>();
private static final String CLASS_NAME = Config.class.getName();
/**
private static final String CLASS_NAME = Config.class.getName();
/**
@@
-83,13
+84,13
@@
public class Config {
}
private void loadEntities(JsonObject jroot) {
}
private void loadEntities(JsonObject jroot) {
- JsonObject jentities;
+ JsonObject jentities
, jaliases
;
JsonArray jblacklist;
final String METHOD_NAME = "loadEntities";
jentities = jroot.getJsonObject("entities");
JsonArray jblacklist;
final String METHOD_NAME = "loadEntities";
jentities = jroot.getJsonObject("entities");
- jblacklist = jentities.getJsonArray("blacklist");
+ jblacklist = jentities.getJsonArray("blacklist");
jblacklist.forEach((jv)-> {
JsonString js;
jblacklist.forEach((jv)-> {
JsonString js;
@@
-97,7
+98,28
@@
public class Config {
blacklistedEntities.add(js.getString());
});
blacklistedEntities.add(js.getString());
});
+ jaliases = jentities.getJsonObject("aliases");
+ jaliases.forEach((k, v)-> {
+ JsonArray jsources = (JsonArray)v;
+
+ jsources.forEach((jsource)-> {
+ entityAliases.put(((JsonString)jsource).getString(), k);
+ });
+ });
+
LOG.logp(Level.FINEST, CLASS_NAME, METHOD_NAME, " blacklistedEntities=" + blacklistedEntities);
LOG.logp(Level.FINEST, CLASS_NAME, METHOD_NAME, " blacklistedEntities=" + blacklistedEntities);
+ LOG.logp(Level.FINEST, CLASS_NAME, METHOD_NAME, " entityAliases=" + entityAliases);
+ }
+
+ public String getEntityAlias(String entity) {
+ String result;
+
+ result = entityAliases.get(entity);
+
+ if (result == null)
+ return entity;
+ else
+ return result;
}
public void loadConfig() throws UnsupportedEncodingException {
}
public void loadConfig() throws UnsupportedEncodingException {
diff --git
a/war/src/main/resources/feeds.json
b/war/src/main/resources/feeds.json
index
7ec9e49
..
337e19a
100644
(file)
--- a/
war/src/main/resources/feeds.json
+++ b/
war/src/main/resources/feeds.json
@@
-146,6
+146,9
@@
"Digital Trends",
"Joey Sneddon",
"CA"
"Digital Trends",
"Joey Sneddon",
"CA"
- ]
+ ],
+ "aliases": {
+ "U.S.": ["United States", "US"]
+ }
}
}
}
}