MCPcopy Index your code
hub / github.com/clips/pattern / download

Function download

pattern/graph/commonsense.py:215–261  ·  view source on GitHub ↗

Downloads commonsense data from http://nodebox.net/perception. Saves the data as commonsense.csv which can be the input for Commonsense.load().

(path=os.path.join(MODULE, "commonsense.csv"), threshold=50)

Source from the content-addressed store, hash-verified

213#--- NODEBOX.NET/PERCEPTION ------------------------------------------------------------------------
214
215def download(path=os.path.join(MODULE, "commonsense.csv"), threshold=50):
216 """ Downloads commonsense data from http://nodebox.net/perception.
217 Saves the data as commonsense.csv which can be the input for Commonsense.load().
218 """
219 s = "http://nodebox.net/perception?format=txt&robots=1"
220 s = urlopen(s).read()
221 s = s.decode("utf-8")
222 s = s.replace("\\'", "'")
223 # Group relations by author.
224 a = {}
225 for r in ([v.strip("'") for v in r.split(", ")] for r in s.split("\n")):
226 if len(r) == 7:
227 a.setdefault(r[-2], []).append(r)
228 # Iterate authors sorted by number of contributions.
229 # 1) Authors with 50+ contributions can define new relations and context.
230 # 2) Authors with 50- contributions (or robots) can only reinforce existing relations.
231 a = sorted(a.items(), cmp=lambda v1, v2: len(v2[1]) - len(v1[1]))
232 r = {}
233 for author, relations in a:
234 if author == "" or author.startswith("robots@"):
235 continue
236 if len(relations) < threshold:
237 break
238 # Sort latest-first (we prefer more recent relation types).
239 relations = sorted(relations, cmp=lambda r1, r2: r1[-1] > r2[-1])
240 # 1) Define new relations.
241 for concept1, relation, concept2, context, weight, author, date in relations:
242 id = (concept1, relation, concept2)
243 if id not in r:
244 r[id] = [None, 0]
245 if r[id][0] is None and context is not None:
246 r[id][0] = context
247 for author, relations in a:
248 # 2) Reinforce existing relations.
249 for concept1, relation, concept2, context, weight, author, date in relations:
250 id = (concept1, relation, concept2)
251 if id in r:
252 r[id][1] += int(weight)
253 # Export CSV-file.
254 s = []
255 for (concept1, relation, concept2), (context, weight) in r.items():
256 s.append("\"%s\",\"%s\",\"%s\",\"%s\",%s" % (
257 concept1, relation, concept2, context, weight))
258 f = open(path, "w")
259 f.write(BOM_UTF8)
260 f.write("\n".join(s).encode("utf-8"))
261 f.close()
262
263def json():
264 """ Returns a JSON-string with the data from commonsense.csv.

Callers

nothing calls this directly

Calls 11

lenFunction · 0.85
stripMethod · 0.80
writeMethod · 0.80
readMethod · 0.45
decodeMethod · 0.45
splitMethod · 0.45
appendMethod · 0.45
setdefaultMethod · 0.45
itemsMethod · 0.45
encodeMethod · 0.45
closeMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…