import pandas as pd
import numpy as np
import sys
initial = open('/Users/Sven/py_files/aoc_2017/d7_input.txt')
initial = initial.readlines()
initial[:5]
Okay that seems like we can work with it. Now I think I'll want to compile this into
a DataFrame
since that will be easier to work with. Maybe we can write a function to format the input into three columns and then return a data frame.
def fmt_block(txt):
# break off stuff after an arrow
txt = txt.split('->')
if len(txt) == 2:
dependencies = txt[1]
else:
dependencies = ''
dependencies = dependencies[:-1]
dependencies = dependencies.replace(', ', ' ')
left = txt[0]
# get the name
name = left[:left.find(' ')]
# Now get the weight:
wt = left[(left.find('(') + 1):left.find(')')]
wt = int(wt)
dat = pd.DataFrame({'name' : [name], 'weight' : [wt], 'dependencies' : [dependencies]})
return(dat)
print(initial[1])
fmt_block(initial[1])
Sweet, now we just need to run this across the whole thing and concatinate the data sets
tower = [fmt_block(x) for x in initial]
tower = pd.concat(tower).reset_index(drop = True)
tower['removed'] = 0
tower.head()
tower_save = tower.copy()
Well this is going swimmingly! Now we're going to have to do some kind of iterative process on the data set where we look for terminal nodes, go through the list, and remove that node from any dependencies.
def rmv_node():
global tower
# Find the node to remove
node_removing = tower.loc[(tower.dependencies == '') & (tower.removed == 0), 'name']
node_removing = node_removing.index[0]
tower.loc[node_removing, 'removed'] = 1
node_removing = tower.name[node_removing]
# Format the regular expression to remove:
node_removing = '(^| )' + node_removing + '($| )'
# Replace it from any dependencies
tower['dependencies'] = tower.dependencies.replace(node_removing, ' ', regex = True)
tower['dependencies'] = tower.dependencies.str.strip()
Now we just keep doing this until they're all gone but one
i = 0
while (sum(tower.removed) + 1) < len(tower):
rmv_node()
i += 1
if i % 100 == 0:
print(i)
tower.removed.value_counts()
tower[tower.removed == 0]
Now we have to make a change to the program where we look for 'balance' among the subtrees' weights. It seems like the hardest part of this will be finding nodes where all their dependencies are terminal branches...
tower_save['branches'] = tower_save.dependencies.str.strip()
tower_save['branches'] = tower_save.branches.str.count(' ') + 1
tower_save['total_weight'] = tower_save['weight']
tower_save_again = tower_save.copy()
tower_save.loc[tower_save.dependencies == '', 'branches'] = 0
Seems like we don't have any single branch connections at the end. Kinda wonder if we can work ourselves from the bottom up instead of top down...
tower_save[tower_save.name == 'svugo']
Let's write a function to find the sub nodes given a name.
def find_subnodes(nam):
dependencies = tower_save.loc[tower_save.name == nam,'dependencies']
dependencies = dependencies.str.split().tolist()[0]
subnodes = tower_save[tower_save.name.isin(dependencies)]
return(subnodes)
find_subnodes('svugo')
Now we want to try and write a function for checking if the nodes are balanced:
def is_balanced(nam):
global tower_save
# first get subnodes:
subnodes = find_subnodes(nam)
# Now if any are any that have branches themselves, start calling is_balanced:
branches = subnodes.loc[subnodes.branches != 0]
if len(branches) > 0:
for i in range(len(branches)):
is_balanced(branches.name.iloc[i])
# repull the subnodes when we return
subnodes = find_subnodes(nam)
# check if the subnodes are actually different
if len(subnodes.total_weight.unique()) != 1:
print('unbalanced at', nam)
display(subnodes)
raise KeyboardInterrupt
# Now that all the subnodes have been checked, we set the total_weight
# equal to the sum of the subnote weights plus the node's weight
if len(subnodes) > 0:
tower_save.loc[tower_save.name == nam, 'total_weight'] += sum(subnodes.total_weight)
is_balanced('svugo')
So now to find the aswer, we just have to take the weight of sphbbz and subtract the difference between its total weight and the total weight of oxipms.
1161 - (2680 - 2671)
I'm kind of pleased with myself as usually when I try to write these recursive functions they just fail miserably. R also has a habit of crashing if you open to many function execution environment but python seems to be okay with it.