Controlling the passing of data

Sayth Renshaw flebber.crue at gmail.com
Thu Apr 28 08:02:06 EDT 2016


Hi

This file contains my biggest roadblock with programming and that's the abstract nature of needing to pass data from one thing to the next.

In my file here I needed to traverse and modify the XML file I don't want to restore it or put it in a new variable or other format I just want to alter it and let it flow onto the list comprehensions as they were.

Once I can get on top of this mentally I will be able to do so much better, I think I am trying to manage it in my head as if it was water and plumbing. 

In particular here I am taking the id from race and putting it into the children of each race called nomination. 

I have put a comment above the new code which is causing the difficulty.

from pyquery import PyQuery as pq
import pandas as pd
import argparse
import numpy as np

# from glob import glob


parser = argparse.ArgumentParser(description=None)


def GetArgs(parser):
    """Parser function using argparse"""
    # parser.add_argument('directory', help='directory use',
    #                     action='store', nargs='*')
    parser.add_argument("files", nargs="+")
    return parser.parse_args()

fileList = GetArgs(parser)
# print(fileList.files)


data = []


horseattrs = ('race_id', 'id', 'horse', 'number', 'finished', 'age', 'sex',
              'blinkers', 'trainernumber', 'career', 'thistrack', 'firstup',
              'secondup', 'variedweight', 'weight', 'pricestarting')
meetattrs = ('id', 'venue', 'date', 'rail', 'weather', 'trackcondition')
raceattrs = ('id', 'number', 'shortname', 'stage', 'distance',
             'grade', 'age', 'weightcondition', 'fastesttime', 'sectionaltime')
clubattrs = ('code')

frames = pd.DataFrame([])
noms = []
for items in fileList.files:
    d = pq(filename=items)
    meet = d('meeting')
    club = d('club')
    race = d('race')
    res = d('nomination')

    # d('p').filter(lambda i: i == 1)

    # Here i need to traverse and modify but I don't want to restore the
    # structure just pass it on. So I can use it in the following list
    # comprehensions as I had before.

    for race_el in d('race'):
        race = pq(race_el)
        race_id = race.attr('id')

    for nom_el in race.items('nomination'):
        res.append((pq(nom_el).attr('raceid', race_id)))

    resdata = [[res.eq(i).attr(x)
                for x in horseattrs] for i in range(len(res))]
    # print(dataSets)

    meetdata = [[meet.eq(i).attr(x)
                 for x in meetattrs] for i in range(len(meet))]
    racedata = [[race.eq(i).attr(x)
                 for x in raceattrs] for i in range(len(race))]
    clubdata = [[club.eq(i).attr(x)
                 for x in clubattrs] for i in range(len(club))]
    raceid = [row[0] for row in racedata]

# L = [x + [0] for x in L]
# print(resdata)
# resdata = [raceid[i] for i in raceid  x + i for x in resdata]

# for number of classes equalling nomination in the each category of
# race inset raceid into resdata
#
# print(resdata)
# clubdf = pd.DataFrame(clubdata)
# meetdf = pd.DataFrame(meetdata)
# racedf = pd.DataFrame(racedata)
# resdf = pd.DataFrame(resdata)
# frames = frames.append(clubdf)
# frames = frames.append(meetdf)
#
# frames = frames.append(racedf)
# frames = frames.append(resdf)

# print(frames)
# frames.to_csv('~/testingFrame5.csv', encoding='utf-8')

Thanks

Sayth



More information about the Python-list mailing list