Downloads commonsense data from http://nodebox.net/perception. Saves the data as commonsense.csv which can be the input for Commonsense.load().
(path=os.path.join(MODULE, "commonsense.csv"), threshold=50)
| 213 | #--- NODEBOX.NET/PERCEPTION ------------------------------------------------------------------------ |
| 214 | |
| 215 | def download(path=os.path.join(MODULE, "commonsense.csv"), threshold=50): |
| 216 | """ Downloads commonsense data from http://nodebox.net/perception. |
| 217 | Saves the data as commonsense.csv which can be the input for Commonsense.load(). |
| 218 | """ |
| 219 | s = "http://nodebox.net/perception?format=txt&robots=1" |
| 220 | s = urlopen(s).read() |
| 221 | s = s.decode("utf-8") |
| 222 | s = s.replace("\\'", "'") |
| 223 | # Group relations by author. |
| 224 | a = {} |
| 225 | for r in ([v.strip("'") for v in r.split(", ")] for r in s.split("\n")): |
| 226 | if len(r) == 7: |
| 227 | a.setdefault(r[-2], []).append(r) |
| 228 | # Iterate authors sorted by number of contributions. |
| 229 | # 1) Authors with 50+ contributions can define new relations and context. |
| 230 | # 2) Authors with 50- contributions (or robots) can only reinforce existing relations. |
| 231 | a = sorted(a.items(), cmp=lambda v1, v2: len(v2[1]) - len(v1[1])) |
| 232 | r = {} |
| 233 | for author, relations in a: |
| 234 | if author == "" or author.startswith("robots@"): |
| 235 | continue |
| 236 | if len(relations) < threshold: |
| 237 | break |
| 238 | # Sort latest-first (we prefer more recent relation types). |
| 239 | relations = sorted(relations, cmp=lambda r1, r2: r1[-1] > r2[-1]) |
| 240 | # 1) Define new relations. |
| 241 | for concept1, relation, concept2, context, weight, author, date in relations: |
| 242 | id = (concept1, relation, concept2) |
| 243 | if id not in r: |
| 244 | r[id] = [None, 0] |
| 245 | if r[id][0] is None and context is not None: |
| 246 | r[id][0] = context |
| 247 | for author, relations in a: |
| 248 | # 2) Reinforce existing relations. |
| 249 | for concept1, relation, concept2, context, weight, author, date in relations: |
| 250 | id = (concept1, relation, concept2) |
| 251 | if id in r: |
| 252 | r[id][1] += int(weight) |
| 253 | # Export CSV-file. |
| 254 | s = [] |
| 255 | for (concept1, relation, concept2), (context, weight) in r.items(): |
| 256 | s.append("\"%s\",\"%s\",\"%s\",\"%s\",%s" % ( |
| 257 | concept1, relation, concept2, context, weight)) |
| 258 | f = open(path, "w") |
| 259 | f.write(BOM_UTF8) |
| 260 | f.write("\n".join(s).encode("utf-8")) |
| 261 | f.close() |
| 262 | |
| 263 | def json(): |
| 264 | """ Returns a JSON-string with the data from commonsense.csv. |
nothing calls this directly
no test coverage detected
searching dependent graphs…