From davidgshi at yahoo.co.uk Fri Oct 2 08:16:49 2020 From: davidgshi at yahoo.co.uk (davidgshi at yahoo.co.uk) Date: Fri, 2 Oct 2020 12:16:49 +0000 (UTC) Subject: [Pandas-dev] ValueError: arrays must all be same length In-Reply-To: References: Message-ID: <1024960091.1750439.1601641009676@mail.yahoo.com> Hello, I got a json response from an API and tried to use pandas to put data into a dataframe. However, I kept getting this?ValueError: arrays must all be same length. Can anyone help? The following is the json text.? Regards, Shao { "locationId": "1-1004508435", "providerId": "1-101641521", "organisationType": "Location", "type": "Social Care Org", "name": "Meadow Rose Nursing Home", "brandId": "BD510", "brandName": "BRAND MACC Care", "onspdCcgCode": "E38000220", "onspdCcgName": "NHS Birmingham and Solihull CCG", "odsCode": "VM4G9", "uprn": "100070537642", "registrationStatus": "Registered", "registrationDate": "2013-12-16", "dormancy": "N", "numberOfBeds": 56, "postalAddressLine1": "96 The Roundabout", "postalAddressTownCity": "Birmingham", "postalAddressCounty": "West Midlands", "region": "West Midlands", "postalCode": "B31 2TX", "onspdLatitude": 52.399843, "onspdLongitude": -1.989241, "careHome": "Y", "inspectionDirectorate": "Adult social care", "mainPhoneNumber": "01214769808", "constituency": "Birmingham, Northfield", "localAuthority": "Birmingham", "lastInspection": { "date": "2020-06-24" }, "lastReport": { "publicationDate": "2020-10-01" }, "relationships": [ ], "locationTypes": [ ], "regulatedActivities": [ { "name": "Accommodation for persons who require nursing or personal care", "code": "RA2", "contacts": [ { "personTitle": "Mr", "personGivenName": "Steven", "personFamilyName": "Kazembe", "personRoles": [ "Registered Manager" ] } ] }, { "name": "Treatment of disease, disorder or injury", "code": "RA5", "contacts": [ { "personTitle": "Mr", "personGivenName": "Steven", "personFamilyName": "Kazembe", "personRoles": [ "Registered Manager" ] } ] } ], "gacServiceTypes": [ { "name": "Nursing homes", "description": "Care home service with nursing" } ], "inspectionCategories": [ { "code": "S1", "primary": "true", "name": "Residential social care" } ], "specialisms": [ { "name": "Caring for adults over 65 yrs" }, { "name": "Caring for adults under 65 yrs" }, { "name": "Dementia" }, { "name": "Physical disabilities" } ], "inspectionAreas": [ ], "currentRatings": { "overall": { "rating": "Requires improvement", "reportDate": "2020-10-01", "reportLinkId": "1157c975-c2f1-423e-a2b4- 66901779e014", "useOfResources": { }, "keyQuestionRatings": [ { "name": "Safe", "rating": "Requires improvement", "reportDate": "2020-10-01", "reportLinkId": "1157c975-c2f1-423e-a2b4- 66901779e014" }, { "name": "Well-led", "rating": "Requires improvement", "reportDate": "2020-10-01", "reportLinkId": "1157c975-c2f1-423e-a2b4- 66901779e014" }, { "name": "Caring", "rating": "Good", "reportDate": "2019-10-04", "reportLinkId": "63ff05ec-4d31-406e-83de- 49a271cfdc43" }, { "name": "Responsive", "rating": "Good", "reportDate": "2019-10-04", "reportLinkId": "63ff05ec-4d31-406e-83de- 49a271cfdc43" }, { "name": "Effective", "rating": "Requires improvement", "reportDate": "2019-10-04", "reportLinkId": "63ff05ec-4d31-406e-83de- 49a271cfdc43" } ] }, "reportDate": "2020-10-01" }, "historicRatings": [ { "reportLinkId": "63ff05ec-4d31-406e-83de- 49a271cfdc43", "reportDate": "2019-10-04", "overall": { "rating": "Requires improvement", "keyQuestionRatings": [ { "name": "Safe", "rating": "Requires improvement" }, { "name": "Well-led", "rating": "Requires improvement" } ] } }, { "reportLinkId": "4f20da40-89a4-4c45-a7f9- bfd52b48f286", "reportDate": "2017-09-08", "overall": { "rating": "Good", "keyQuestionRatings": [ { "name": "Safe", "rating": "Good" }, { "name": "Well-led", "rating": "Good" }, { "name": "Caring", "rating": "Good" }, { "name": "Responsive", "rating": "Good" }, { "name": "Effective", "rating": "Requires improvement" } ] } }, { "reportLinkId": "0cc4226b-401e-4f0f-ba35- 062cbadffa8f", "reportDate": "2016-06-11", "overall": { "rating": "Requires improvement", "keyQuestionRatings": [ { "name": "Safe", "rating": "Requires improvement" }, { "name": "Well-led", "rating": "Requires improvement" }, { "name": "Caring", "rating": "Requires improvement" }, { "name": "Responsive", "rating": "Requires improvement" }, { "name": "Effective", "rating": "Good" } ] } }, { "reportLinkId": "a11c1e52-ddfd-4cd8-8b56- 1b96ac287c96", "reportDate": "2015-01-12", "overall": { "rating": "Good", "keyQuestionRatings": [ { "name": "Safe", "rating": "Good" }, { "name": "Well-led", "rating": "Good" }, { "name": "Caring", "rating": "Good" }, { "name": "Responsive", "rating": "Requires improvement" }, { "name": "Effective", "rating": "Good" } ] } } ], "reports": [ { "linkId": "1157c975-c2f1-423e-a2b4- 66901779e014", "reportDate": "2020-10-01", "reportUri": "/reports/1157c975-c2f1-423e- a2b4-66901779e014", "firstVisitDate": "2020-06-23", "reportType": "Location" }, { "linkId": "63ff05ec-4d31-406e-83de- 49a271cfdc43", "reportDate": "2019-10-04", "reportUri": "/reports/63ff05ec-4d31-406e- 83de-49a271cfdc43", "firstVisitDate": "2019-08-28", "reportType": "Location" }, { "linkId": "4f20da40-89a4-4c45-a7f9- bfd52b48f286", "reportDate": "2017-09-08", "reportUri": "/reports/4f20da40-89a4-4c45- a7f9-bfd52b48f286", "firstVisitDate": "2017-07-19", "reportType": "Location" }, { "linkId": "0cc4226b-401e-4f0f-ba35- 062cbadffa8f", "reportDate": "2016-06-11", "reportUri": "/reports/0cc4226b-401e-4f0f- ba35-062cbadffa8f", "firstVisitDate": "2016-03-15", "reportType": "Location" }, { "linkId": "a11c1e52-ddfd-4cd8-8b56- 1b96ac287c96", "reportDate": "2015-01-12", "reportUri": "/reports/a11c1e52-ddfd-4cd8- 8b56-1b96ac287c96", "firstVisitDate": "2014-08-12", "reportType": "Location" } ] } In?[?]:?In?[25]:j?import pandas as pdimport jsonj = json.JSONDecoder().decode(req. text) ###req.jsondf = pd.DataFrame.from_dict(j) -------------- next part -------------- An HTML attachment was scrubbed... URL: From shishaozhong at gmail.com Fri Oct 2 07:30:00 2020 From: shishaozhong at gmail.com (Shaozhong SHI) Date: Fri, 2 Oct 2020 12:30:00 +0100 Subject: [Pandas-dev] ValueError: arrays must all be same length Message-ID: Hello, I got a json response from an API and tried to use pandas to put data into a dataframe. However, I kept getting this ValueError: arrays must all be same length. Can anyone help? The following is the json text. Regards, Shao { "locationId": "1-1004508435", "providerId": "1-101641521", "organisationType": "Location", "type": "Social Care Org", "name": "Meadow Rose Nursing Home", "brandId": "BD510", "brandName": "BRAND MACC Care", "onspdCcgCode": "E38000220", "onspdCcgName": "NHS Birmingham and Solihull CCG", "odsCode": "VM4G9", "uprn": "100070537642", "registrationStatus": "Registered", "registrationDate": "2013-12-16", "dormancy": "N", "numberOfBeds": 56, "postalAddressLine1": "96 The Roundabout", "postalAddressTownCity": "Birmingham", "postalAddressCounty": "West Midlands", "region": "West Midlands", "postalCode": "B31 2TX", "onspdLatitude": 52.399843, "onspdLongitude": -1.989241, "careHome": "Y", "inspectionDirectorate": "Adult social care", "mainPhoneNumber": "01214769808", "constituency": "Birmingham, Northfield", "localAuthority": "Birmingham", "lastInspection": { "date": "2020-06-24" }, "lastReport": { "publicationDate": "2020-10-01" }, "relationships": [ ], "locationTypes": [ ], "regulatedActivities": [ { "name": "Accommodation for persons who require nursing or personal care", "code": "RA2", "contacts": [ { "personTitle": "Mr", "personGivenName": "Steven", "personFamilyName": "Kazembe", "personRoles": [ "Registered Manager" ] } ] }, { "name": "Treatment of disease, disorder or injury", "code": "RA5", "contacts": [ { "personTitle": "Mr", "personGivenName": "Steven", "personFamilyName": "Kazembe", "personRoles": [ "Registered Manager" ] } ] } ], "gacServiceTypes": [ { "name": "Nursing homes", "description": "Care home service with nursing" } ], "inspectionCategories": [ { "code": "S1", "primary": "true", "name": "Residential social care" } ], "specialisms": [ { "name": "Caring for adults over 65 yrs" }, { "name": "Caring for adults under 65 yrs" }, { "name": "Dementia" }, { "name": "Physical disabilities" } ], "inspectionAreas": [ ], "currentRatings": { "overall": { "rating": "Requires improvement", "reportDate": "2020-10-01", "reportLinkId": "1157c975-c2f1-423e-a2b4-66901779e014", "useOfResources": { }, "keyQuestionRatings": [ { "name": "Safe", "rating": "Requires improvement", "reportDate": "2020-10-01", "reportLinkId": "1157c975-c2f1-423e-a2b4-66901779e014" }, { "name": "Well-led", "rating": "Requires improvement", "reportDate": "2020-10-01", "reportLinkId": "1157c975-c2f1-423e-a2b4-66901779e014" }, { "name": "Caring", "rating": "Good", "reportDate": "2019-10-04", "reportLinkId": "63ff05ec-4d31-406e-83de-49a271cfdc43" }, { "name": "Responsive", "rating": "Good", "reportDate": "2019-10-04", "reportLinkId": "63ff05ec-4d31-406e-83de-49a271cfdc43" }, { "name": "Effective", "rating": "Requires improvement", "reportDate": "2019-10-04", "reportLinkId": "63ff05ec-4d31-406e-83de-49a271cfdc43" } ] }, "reportDate": "2020-10-01" }, "historicRatings": [ { "reportLinkId": "63ff05ec-4d31-406e-83de-49a271cfdc43", "reportDate": "2019-10-04", "overall": { "rating": "Requires improvement", "keyQuestionRatings": [ { "name": "Safe", "rating": "Requires improvement" }, { "name": "Well-led", "rating": "Requires improvement" } ] } }, { "reportLinkId": "4f20da40-89a4-4c45-a7f9-bfd52b48f286", "reportDate": "2017-09-08", "overall": { "rating": "Good", "keyQuestionRatings": [ { "name": "Safe", "rating": "Good" }, { "name": "Well-led", "rating": "Good" }, { "name": "Caring", "rating": "Good" }, { "name": "Responsive", "rating": "Good" }, { "name": "Effective", "rating": "Requires improvement" } ] } }, { "reportLinkId": "0cc4226b-401e-4f0f-ba35-062cbadffa8f", "reportDate": "2016-06-11", "overall": { "rating": "Requires improvement", "keyQuestionRatings": [ { "name": "Safe", "rating": "Requires improvement" }, { "name": "Well-led", "rating": "Requires improvement" }, { "name": "Caring", "rating": "Requires improvement" }, { "name": "Responsive", "rating": "Requires improvement" }, { "name": "Effective", "rating": "Good" } ] } }, { "reportLinkId": "a11c1e52-ddfd-4cd8-8b56-1b96ac287c96", "reportDate": "2015-01-12", "overall": { "rating": "Good", "keyQuestionRatings": [ { "name": "Safe", "rating": "Good" }, { "name": "Well-led", "rating": "Good" }, { "name": "Caring", "rating": "Good" }, { "name": "Responsive", "rating": "Requires improvement" }, { "name": "Effective", "rating": "Good" } ] } } ], "reports": [ { "linkId": "1157c975-c2f1-423e-a2b4-66901779e014", "reportDate": "2020-10-01", "reportUri": "/reports/1157c975-c2f1-423e-a2b4-66901779e014", "firstVisitDate": "2020-06-23", "reportType": "Location" }, { "linkId": "63ff05ec-4d31-406e-83de-49a271cfdc43", "reportDate": "2019-10-04", "reportUri": "/reports/63ff05ec-4d31-406e-83de-49a271cfdc43", "firstVisitDate": "2019-08-28", "reportType": "Location" }, { "linkId": "4f20da40-89a4-4c45-a7f9-bfd52b48f286", "reportDate": "2017-09-08", "reportUri": "/reports/4f20da40-89a4-4c45-a7f9-bfd52b48f286", "firstVisitDate": "2017-07-19", "reportType": "Location" }, { "linkId": "0cc4226b-401e-4f0f-ba35-062cbadffa8f", "reportDate": "2016-06-11", "reportUri": "/reports/0cc4226b-401e-4f0f-ba35-062cbadffa8f", "firstVisitDate": "2016-03-15", "reportType": "Location" }, { "linkId": "a11c1e52-ddfd-4cd8-8b56-1b96ac287c96", "reportDate": "2015-01-12", "reportUri": "/reports/a11c1e52-ddfd-4cd8-8b56-1b96ac287c96", "firstVisitDate": "2014-08-12", "reportType": "Location" } ] } In [ ]: In [25]: j import pandas as pd import json j = json.JSONDecoder().decode(req.text) ###req.json df = pd.DataFrame.from_dict(j) -------------- next part -------------- An HTML attachment was scrubbed... URL: From jorisvandenbossche at gmail.com Tue Oct 13 11:34:28 2020 From: jorisvandenbossche at gmail.com (Joris Van den Bossche) Date: Tue, 13 Oct 2020 17:34:28 +0200 Subject: [Pandas-dev] October 2020 monthly dev meeting (October 14, UTC 18:00) Message-ID: Hi all, The next monthly dev call is tomorrow (Wednesday, October 14th) at 18:00 UTC. All are welcome to attend! More details (agenda/minutes, video link) can be found at https://github.com/pandas-dev/pandas/issues/37077. Joris -------------- next part -------------- An HTML attachment was scrubbed... URL: From shishaozhong at gmail.com Wed Oct 14 15:37:05 2020 From: shishaozhong at gmail.com (Shaozhong SHI) Date: Wed, 14 Oct 2020 20:37:05 +0100 Subject: [Pandas-dev] Which one is the fastest one for getting data into PostgreSQL Message-ID: I am using the following code to get data into PostgreSQL. However, it appears to be very slow. from sqlalchemy import create_engine import psycopg2 engine = create_engine('postgresql+psycopg2://username:password at host :5432/mydatabase') out.to_sql('table_name2', engine, if_exists='replace', index=None) Are there any fast way to do it? Looking forward to hearing from you. Regards, David -------------- next part -------------- An HTML attachment was scrubbed... URL: From simongibbons at gmail.com Wed Oct 14 16:49:32 2020 From: simongibbons at gmail.com (Simon Gibbons) Date: Wed, 14 Oct 2020 21:49:32 +0100 Subject: [Pandas-dev] Which one is the fastest one for getting data into PostgreSQL In-Reply-To: References: Message-ID: Hi David, This is likely slow as to_sql dy default produces one INSERT statement for every row in the dataframe. This means that writing your dataframe to the database will need many network round trips. Try adding the argument method='multi' to your call, which will batch up the inserts and should be faster (see https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html ) Best, Simon On Wed, Oct 14, 2020 at 8:43 PM Shaozhong SHI wrote: > I am using the following code to get data into PostgreSQL. > > However, it appears to be very slow. > > from sqlalchemy import create_engine > import psycopg2 > engine = create_engine('postgresql+psycopg2://username:password at host > :5432/mydatabase') > out.to_sql('table_name2', engine, if_exists='replace', index=None) > > Are there any fast way to do it? > > Looking forward to hearing from you. > > Regards, > > David > _______________________________________________ > Pandas-dev mailing list > Pandas-dev at python.org > https://mail.python.org/mailman/listinfo/pandas-dev > -------------- next part -------------- An HTML attachment was scrubbed... URL: From shishaozhong at gmail.com Thu Oct 15 05:13:00 2020 From: shishaozhong at gmail.com (Shaozhong SHI) Date: Thu, 15 Oct 2020 10:13:00 +0100 Subject: [Pandas-dev] Which one is the fastest one for getting data into PostgreSQL In-Reply-To: References: Message-ID: Hi, Simon, Has anyone tested how much faster it can be, when method='multi' is applied? Regards, David On Wed, 14 Oct 2020 at 21:49, Simon Gibbons wrote: > Hi David, > > This is likely slow as to_sql dy default produces one INSERT statement for > every row in the dataframe. This means that writing your dataframe to the > database will need many network round trips. > > Try adding the argument > > method='multi' > > to your call, which will batch up the inserts and should be faster (see > https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html > ) > > Best, > > Simon > > On Wed, Oct 14, 2020 at 8:43 PM Shaozhong SHI > wrote: > >> I am using the following code to get data into PostgreSQL. >> >> However, it appears to be very slow. >> >> from sqlalchemy import create_engine >> import psycopg2 >> engine = create_engine('postgresql+psycopg2://username:password at host >> :5432/mydatabase') >> out.to_sql('table_name2', engine, if_exists='replace', index=None) >> >> Are there any fast way to do it? >> >> Looking forward to hearing from you. >> >> Regards, >> >> David >> _______________________________________________ >> Pandas-dev mailing list >> Pandas-dev at python.org >> https://mail.python.org/mailman/listinfo/pandas-dev >> > -------------- next part -------------- An HTML attachment was scrubbed... URL: From xhochy at gmail.com Thu Oct 15 09:45:59 2020 From: xhochy at gmail.com (Uwe L. Korn) Date: Thu, 15 Oct 2020 15:45:59 +0200 Subject: [Pandas-dev] Which one is the fastest one for getting data into PostgreSQL In-Reply-To: References: Message-ID: Hello, for Postgres there is actually an example in the documentation on how you can get a much better INSERT performance by using the COPY statement: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#insertion-method This should perform even better than using `multi`. Cheers Uwe Am Do., 15. Okt. 2020 um 11:17 Uhr schrieb Shaozhong SHI < shishaozhong at gmail.com>: > Hi, Simon, > > Has anyone tested how much faster it can be, when method='multi' is > applied? > > Regards, > > David > > On Wed, 14 Oct 2020 at 21:49, Simon Gibbons > wrote: > >> Hi David, >> >> This is likely slow as to_sql dy default produces one INSERT statement >> for every row in the dataframe. This means that writing your dataframe to >> the database will need many network round trips. >> >> Try adding the argument >> >> method='multi' >> >> to your call, which will batch up the inserts and should be faster (see >> https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html >> ) >> >> Best, >> >> Simon >> >> On Wed, Oct 14, 2020 at 8:43 PM Shaozhong SHI >> wrote: >> >>> I am using the following code to get data into PostgreSQL. >>> >>> However, it appears to be very slow. >>> >>> from sqlalchemy import create_engine >>> import psycopg2 >>> engine = create_engine('postgresql+psycopg2://username:password at host >>> :5432/mydatabase') >>> out.to_sql('table_name2', engine, if_exists='replace', index=None) >>> >>> Are there any fast way to do it? >>> >>> Looking forward to hearing from you. >>> >>> Regards, >>> >>> David >>> _______________________________________________ >>> Pandas-dev mailing list >>> Pandas-dev at python.org >>> https://mail.python.org/mailman/listinfo/pandas-dev >>> >> _______________________________________________ > Pandas-dev mailing list > Pandas-dev at python.org > https://mail.python.org/mailman/listinfo/pandas-dev > -------------- next part -------------- An HTML attachment was scrubbed... URL: From tom.augspurger88 at gmail.com Tue Oct 20 10:45:03 2020 From: tom.augspurger88 at gmail.com (Tom Augspurger) Date: Tue, 20 Oct 2020 09:45:03 -0500 Subject: [Pandas-dev] NASA Funding opportunity Message-ID: Hi all, NASA has a new CFP that's focused on supporting existing open-source projects. https://nspires.nasaprs.com/external/solicitations/summary.do?solId=%7b958CF134-D655-E512-B5AD-84501D14A0C1%7d 1.1 Motivation Open source software tools, libraries, and frameworks play an increasingly prominent role in SMD-related science research and applications. As the adoption of open software accelerates the rate of scientific discovery, the National Academies? has recognized the growing need among the NASA science community to provide sustained support and maintenance of these types of software in their 2018 report Open Source Software Policy Options for NASA Earth and Space Sciences. This program is designed to provide support to the sustainable development of open source software, tools, libraries, and framework that are critical for SMD scientific objectives. 1.2 Objectives SMD seeks proposals for the improvement and sustainment of high-value, open source tools, frameworks, and libraries that have made significant impacts to the SMD science community. We are seeking proposals that satisfy the following objectives: ? Open source software tools, libraries, and frameworks that have significant usage in the NASA science community, developed following open and collaborative practices, and are aligned with the scientific vision and data strategic plan of SMD. ? Proposals should look to improve the sustainability and utility of these packages through improvements to adding extensions, documentation, infrastructure, and maintenance of the software. This program seeks to support projects under active development and usage, and it does not support updating of legacy software that is no longer supported, which can be supported under other calls. We are not soliciting the development of new open source tools, frameworks, or libraries with this call. I'm not sure exactly what our chances to get funded will be compared to projects more squarely in the science domain, but this may be worth pursing. Is anyone interested in collaborating on a proposal? Timeline, we'd file a notice of intent by November 19th, and the proposal by January 19th. Tom Tom -------------- next part -------------- An HTML attachment was scrubbed... URL: From andy.terrel at gmail.com Tue Oct 20 13:02:10 2020 From: andy.terrel at gmail.com (Andy Ray Terrel) Date: Tue, 20 Oct 2020 12:02:10 -0500 Subject: [Pandas-dev] NASA Funding opportunity In-Reply-To: References: Message-ID: FWIW, NumFOCUS is running a few grants with NASA right now. I can definitely help with a proposal. On Tue, Oct 20, 2020 at 9:45 AM Tom Augspurger wrote: > Hi all, > > NASA has a new CFP that's focused on supporting existing open-source > projects. > https://nspires.nasaprs.com/external/solicitations/summary.do?solId=%7b958CF134-D655-E512-B5AD-84501D14A0C1%7d > > 1.1 Motivation > Open source software tools, libraries, and frameworks play an increasingly > prominent > role in SMD-related science research and applications. As the adoption of > open > software accelerates the rate of scientific discovery, the National > Academies? has > recognized the growing need among the NASA science community to provide > sustained > support and maintenance of these types of software in their 2018 report > Open Source > Software Policy Options for NASA Earth and Space Sciences. This program is > designed > to provide support to the sustainable development of open source software, > tools, > libraries, and framework that are critical for SMD scientific objectives. > > 1.2 Objectives > SMD seeks proposals for the improvement and sustainment of high-value, > open source > tools, frameworks, and libraries that have made significant impacts to the > SMD science > community. We are seeking proposals that satisfy the following objectives: > ? Open source software tools, libraries, and frameworks that have > significant > usage in the NASA science community, developed following open and > collaborative practices, and are aligned with the scientific vision and > data > strategic plan of SMD. > ? Proposals should look to improve the sustainability and utility of these > packages > through improvements to adding extensions, documentation, infrastructure, > and > maintenance of the software. > This program seeks to support projects under active development and usage, > and it > does not support updating of legacy software that is no longer supported, > which can be > supported under other calls. We are not soliciting the development of new > open source > tools, frameworks, or libraries with this call. > > I'm not sure exactly what our chances to get funded will be compared to > projects more squarely in the science domain, but this may be worth > pursing. Is anyone interested in collaborating on a proposal? > > Timeline, we'd file a notice of intent by November 19th, and the proposal > by January 19th. > > Tom > > Tom > > _______________________________________________ > Pandas-dev mailing list > Pandas-dev at python.org > https://mail.python.org/mailman/listinfo/pandas-dev > -------------- next part -------------- An HTML attachment was scrubbed... URL: From robertb at sccwrp.org Fri Oct 23 07:53:09 2020 From: robertb at sccwrp.org (Robert Butler) Date: Fri, 23 Oct 2020 11:53:09 +0000 Subject: [Pandas-dev] Help with contributing Message-ID: Hi Pandas dev I am developing a small web application using flask and pandas Part of it involved making an api call using fetch to the server, and the server had to return some data formatted as html The code that executes during the fetch request does a lot, and when I used the dataframe.to_html() method it took a while I spent some time making a function that converts a dataframe to html and I was able to get something that runs a lot faster, and the difference is pretty noticeable when the dataframe is a lot larger When I looked at the source code, it seemed like it would be difficult to incorporate the function as a contribution, so I was wondering if I could get some help? I have roughly 2 years of experience with programming in python, and did not major in CS, so I consider myself to be a beginner, which Is why I struggle with something basic like making a contribution like this I was wondering if I could get some help making this contribution? This is the function, obviously you can tell I sort of wrote it in such a way that it would be convenient for my particular application def htmltable(df, id = None, cssclass = None, enumeraterows = True): ''' df is a pandas dataframe, id is a css id you want to give to the table, cssclass is a css class for the table, enumeraterows actually only distinguishes even/odd rows with css classes ''' html = """ {} {} {} """.format( # add in the id f" id = {id}" if id else "", # add the class f" class = {cssclass}" if cssclass else "", # colgroups ''.join([''.format(colname) for colname in df.columns]), # column headers ''.join( [ # sticks on the outsides of the row after doing the join '{}'.format( ''.join(df.columns) ) ] ), # cells of table body ''.join( [ # sticks on the outsides of the row after doing the join # adds even and odd css classes to each row as well '{}'.format( ' class="row-even"' if i % 2 == 0 else ' class="row-odd"' if enumeraterows else "", i, x ) for i,x in # Zips columns together, then joins them with closing table cell tag and opening table cell tag between enumerate([''.join( list( map( lambda cell: '{}'.format(cell['column_name'], cell['column_value']), row ) ) ) for row in zip(* [ df[col].apply(lambda x: {'column_name':col, 'column_value': x}) for col in df.columns ] ) ]) ] ) ) return html -------------- next part -------------- An HTML attachment was scrubbed... URL: From william.ayd at icloud.com Fri Oct 23 12:07:50 2020 From: william.ayd at icloud.com (William Ayd) Date: Fri, 23 Oct 2020 09:07:50 -0700 Subject: [Pandas-dev] Help with contributing In-Reply-To: References: Message-ID: Thanks for the interest Robert. The best advice I can give is to break up the problem into very small pieces and try to approach improvements to the code base from there. You?ve written a function for your own purpose which is awesome, but it is unlikely that pandas would just adopt that function on its own. So instead I would suggest to take a step back and focus on the problem that ?to_html() is too slow?. If that?s the problem you have nailed down I would then suggest trying to dig a little deeper by: 1. Searching the existing issue tracker on GitHub for similar issues 2. Profiling what exactly makes it slow If #1 works out any clarifications you can add to the existing issue (and of course PRs to solve) would be helpful. If you don?t see any existing issue but can provide a timing profile of performance, I would advise opening a dedicated issue with that information. Hope that helps. - Will > On Oct 23, 2020, at 4:53 AM, Robert Butler wrote: > > Hi Pandas dev > > I am developing a small web application using flask and pandas > Part of it involved making an api call using fetch to the server, and the server had to return some data formatted as html > > The code that executes during the fetch request does a lot, and when I used the dataframe.to_html() method it took a while > > I spent some time making a function that converts a dataframe to html and I was able to get something that runs a lot faster, and the difference is pretty noticeable when the dataframe is a lot larger > > When I looked at the source code, it seemed like it would be difficult to incorporate the function as a contribution, so I was wondering if I could get some help? > > I have roughly 2 years of experience with programming in python, and did not major in CS, so I consider myself to be a beginner, which Is why I struggle with something basic like making a contribution like this > > I was wondering if I could get some help making this contribution? > > This is the function, obviously you can tell I sort of wrote it in such a way that it would be convenient for my particular application > > def htmltable(df, id = None, cssclass = None, enumeraterows = True): > ''' > df is a pandas dataframe, > id is a css id you want to give to the table, > cssclass is a css class for the table, > enumeraterows actually only distinguishes even/odd rows with css classes > ''' > > html = """ > > > {} > > > {} > > > {} > > > """.format( > # add in the id > f" id = {id}" if id else "", > > # add the class > f" class = {cssclass}" if cssclass else "", > > # colgroups > ''.join([''.format(colname) for colname in df.columns]), > > # column headers > ''.join( > [ > # sticks on the outsides of the row after doing the join > '{}'.format( > ''.join(df.columns) > ) > ] > ), > # cells of table body > ''.join( > [ > # sticks on the outsides of the row after doing the join > # adds even and odd css classes to each row as well > '{}'.format( > ' class="row-even"' if i % 2 == 0 else ' class="row-odd"' if enumeraterows else "", > i, > x > ) for i,x in > # Zips columns together, then joins them with closing table cell tag and opening table cell tag between > enumerate([''.join( > list( > map( > lambda cell: > '{}'.format(cell['column_name'], cell['column_value']), > row > ) > ) > ) > for row in > zip(* > [ > df[col].apply(lambda x: {'column_name':col, 'column_value': x}) for col in df.columns > ] > ) > ]) > ] > ) > ) > return html > > > > _______________________________________________ > Pandas-dev mailing list > Pandas-dev at python.org > https://mail.python.org/mailman/listinfo/pandas-dev -------------- next part -------------- An HTML attachment was scrubbed... URL: