Driver function to load data from songs and event log files into Postgres database. :param cur: a database cursor reference :param conn: database connection reference :param filepath: parent directory where the files exists :param func: function to call
(cur, conn, filepath, func)
| 80 | |
| 81 | |
| 82 | def process_data(cur, conn, filepath, func): |
| 83 | """ |
| 84 | Driver function to load data from songs and event log files into Postgres database. |
| 85 | :param cur: a database cursor reference |
| 86 | :param conn: database connection reference |
| 87 | :param filepath: parent directory where the files exists |
| 88 | :param func: function to call |
| 89 | """ |
| 90 | # get all files matching extension from directory |
| 91 | all_files = [] |
| 92 | for root, dirs, files in os.walk(filepath): |
| 93 | files = glob.glob(os.path.join(root,'*.json')) |
| 94 | for f in files : |
| 95 | all_files.append(os.path.abspath(f)) |
| 96 | |
| 97 | # get total number of files found |
| 98 | num_files = len(all_files) |
| 99 | print('{} files found in {}'.format(num_files, filepath)) |
| 100 | |
| 101 | # iterate over files and process |
| 102 | for i, datafile in enumerate(all_files, 1): |
| 103 | func(cur, datafile) |
| 104 | conn.commit() |
| 105 | print('{}/{} files processed.'.format(i, num_files)) |
| 106 | |
| 107 | |
| 108 | def main(): |