From davidgshi at yahoo.co.uk  Fri Oct  2 08:16:49 2020
From: davidgshi at yahoo.co.uk (davidgshi at yahoo.co.uk)
Date: Fri, 2 Oct 2020 12:16:49 +0000 (UTC)
Subject: [Pandas-dev] ValueError: arrays must all be same length
In-Reply-To: <CA+i5Jwa4LxNZFwCGGbyfVFZO3YmFdPD2FnOQFsmm_DG_MVKRPQ@mail.gmail.com>
References: <CA+i5JwbLEZ+XHNurs3+eOS4vis2hEcVs6Qanpa=_sHk8mtQyhA@mail.gmail.com>
 <CA+i5Jwa4LxNZFwCGGbyfVFZO3YmFdPD2FnOQFsmm_DG_MVKRPQ@mail.gmail.com>
Message-ID: <1024960091.1750439.1601641009676@mail.yahoo.com>

 Hello,

I got a json response from an API and tried to use pandas to put data into a dataframe.
However, I kept getting this?ValueError: arrays must all be same length.
Can anyone help?
The following is the json text.? Regards, Shao
{
  "locationId": "1-1004508435",
  "providerId": "1-101641521",
  "organisationType": "Location",
  "type": "Social Care Org",
  "name": "Meadow Rose Nursing Home",
  "brandId": "BD510",
  "brandName": "BRAND MACC Care",
  "onspdCcgCode": "E38000220",
  "onspdCcgName": "NHS Birmingham and Solihull CCG",
  "odsCode": "VM4G9",
  "uprn": "100070537642",
  "registrationStatus": "Registered",
  "registrationDate": "2013-12-16",
  "dormancy": "N",
  "numberOfBeds": 56,
  "postalAddressLine1": "96 The Roundabout",
  "postalAddressTownCity": "Birmingham",
  "postalAddressCounty": "West Midlands",
  "region": "West Midlands",
  "postalCode": "B31 2TX",
  "onspdLatitude": 52.399843,
  "onspdLongitude": -1.989241,
  "careHome": "Y",
  "inspectionDirectorate": "Adult social care",
  "mainPhoneNumber": "01214769808",
  "constituency": "Birmingham, Northfield",
  "localAuthority": "Birmingham",
  "lastInspection": {
    "date": "2020-06-24"
  },
  "lastReport": {
    "publicationDate": "2020-10-01"
  },
  "relationships": [
    
  ],
  "locationTypes": [
    
  ],
  "regulatedActivities": [
    {
      "name": "Accommodation for persons who require nursing or personal care",
      "code": "RA2",
      "contacts": [
        {
          "personTitle": "Mr",
          "personGivenName": "Steven",
          "personFamilyName": "Kazembe",
          "personRoles": [
            "Registered Manager"
          ]
        }
      ]
    },
    {
      "name": "Treatment of disease, disorder or injury",
      "code": "RA5",
      "contacts": [
        {
          "personTitle": "Mr",
          "personGivenName": "Steven",
          "personFamilyName": "Kazembe",
          "personRoles": [
            "Registered Manager"
          ]
        }
      ]
    }
  ],
  "gacServiceTypes": [
    {
      "name": "Nursing homes",
      "description": "Care home service with nursing"
    }
  ],
  "inspectionCategories": [
    {
      "code": "S1",
      "primary": "true",
      "name": "Residential social care"
    }
  ],
  "specialisms": [
    {
      "name": "Caring for adults over 65 yrs"
    },
    {
      "name": "Caring for adults under 65 yrs"
    },
    {
      "name": "Dementia"
    },
    {
      "name": "Physical disabilities"
    }
  ],
  "inspectionAreas": [
    
  ],
  "currentRatings": {
    "overall": {
      "rating": "Requires improvement",
      "reportDate": "2020-10-01",
      "reportLinkId": "1157c975-c2f1-423e-a2b4- 66901779e014",
      "useOfResources": {
        
      },
      "keyQuestionRatings": [
        {
          "name": "Safe",
          "rating": "Requires improvement",
          "reportDate": "2020-10-01",
          "reportLinkId": "1157c975-c2f1-423e-a2b4- 66901779e014"
        },
        {
          "name": "Well-led",
          "rating": "Requires improvement",
          "reportDate": "2020-10-01",
          "reportLinkId": "1157c975-c2f1-423e-a2b4- 66901779e014"
        },
        {
          "name": "Caring",
          "rating": "Good",
          "reportDate": "2019-10-04",
          "reportLinkId": "63ff05ec-4d31-406e-83de- 49a271cfdc43"
        },
        {
          "name": "Responsive",
          "rating": "Good",
          "reportDate": "2019-10-04",
          "reportLinkId": "63ff05ec-4d31-406e-83de- 49a271cfdc43"
        },
        {
          "name": "Effective",
          "rating": "Requires improvement",
          "reportDate": "2019-10-04",
          "reportLinkId": "63ff05ec-4d31-406e-83de- 49a271cfdc43"
        }
      ]
    },
    "reportDate": "2020-10-01"
  },
  "historicRatings": [
    {
      "reportLinkId": "63ff05ec-4d31-406e-83de- 49a271cfdc43",
      "reportDate": "2019-10-04",
      "overall": {
        "rating": "Requires improvement",
        "keyQuestionRatings": [
          {
            "name": "Safe",
            "rating": "Requires improvement"
          },
          {
            "name": "Well-led",
            "rating": "Requires improvement"
          }
        ]
      }
    },
    {
      "reportLinkId": "4f20da40-89a4-4c45-a7f9- bfd52b48f286",
      "reportDate": "2017-09-08",
      "overall": {
        "rating": "Good",
        "keyQuestionRatings": [
          {
            "name": "Safe",
            "rating": "Good"
          },
          {
            "name": "Well-led",
            "rating": "Good"
          },
          {
            "name": "Caring",
            "rating": "Good"
          },
          {
            "name": "Responsive",
            "rating": "Good"
          },
          {
            "name": "Effective",
            "rating": "Requires improvement"
          }
        ]
      }
    },
    {
      "reportLinkId": "0cc4226b-401e-4f0f-ba35- 062cbadffa8f",
      "reportDate": "2016-06-11",
      "overall": {
        "rating": "Requires improvement",
        "keyQuestionRatings": [
          {
            "name": "Safe",
            "rating": "Requires improvement"
          },
          {
            "name": "Well-led",
            "rating": "Requires improvement"
          },
          {
            "name": "Caring",
            "rating": "Requires improvement"
          },
          {
            "name": "Responsive",
            "rating": "Requires improvement"
          },
          {
            "name": "Effective",
            "rating": "Good"
          }
        ]
      }
    },
    {
      "reportLinkId": "a11c1e52-ddfd-4cd8-8b56- 1b96ac287c96",
      "reportDate": "2015-01-12",
      "overall": {
        "rating": "Good",
        "keyQuestionRatings": [
          {
            "name": "Safe",
            "rating": "Good"
          },
          {
            "name": "Well-led",
            "rating": "Good"
          },
          {
            "name": "Caring",
            "rating": "Good"
          },
          {
            "name": "Responsive",
            "rating": "Requires improvement"
          },
          {
            "name": "Effective",
            "rating": "Good"
          }
        ]
      }
    }
  ],
  "reports": [
    {
      "linkId": "1157c975-c2f1-423e-a2b4- 66901779e014",
      "reportDate": "2020-10-01",
      "reportUri": "/reports/1157c975-c2f1-423e- a2b4-66901779e014",
      "firstVisitDate": "2020-06-23",
      "reportType": "Location"
    },
    {
      "linkId": "63ff05ec-4d31-406e-83de- 49a271cfdc43",
      "reportDate": "2019-10-04",
      "reportUri": "/reports/63ff05ec-4d31-406e- 83de-49a271cfdc43",
      "firstVisitDate": "2019-08-28",
      "reportType": "Location"
    },
    {
      "linkId": "4f20da40-89a4-4c45-a7f9- bfd52b48f286",
      "reportDate": "2017-09-08",
      "reportUri": "/reports/4f20da40-89a4-4c45- a7f9-bfd52b48f286",
      "firstVisitDate": "2017-07-19",
      "reportType": "Location"
    },
    {
      "linkId": "0cc4226b-401e-4f0f-ba35- 062cbadffa8f",
      "reportDate": "2016-06-11",
      "reportUri": "/reports/0cc4226b-401e-4f0f- ba35-062cbadffa8f",
      "firstVisitDate": "2016-03-15",
      "reportType": "Location"
    },
    {
      "linkId": "a11c1e52-ddfd-4cd8-8b56- 1b96ac287c96",
      "reportDate": "2015-01-12",
      "reportUri": "/reports/a11c1e52-ddfd-4cd8- 8b56-1b96ac287c96",
      "firstVisitDate": "2014-08-12",
      "reportType": "Location"
    }
  ]
}
In?[?]:?In?[25]:j?import pandas as pdimport jsonj = json.JSONDecoder().decode(req. text)  ###req.jsondf = pd.DataFrame.from_dict(j)


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/pandas-dev/attachments/20201002/50be2304/attachment.html>

From shishaozhong at gmail.com  Fri Oct  2 07:30:00 2020
From: shishaozhong at gmail.com (Shaozhong SHI)
Date: Fri, 2 Oct 2020 12:30:00 +0100
Subject: [Pandas-dev] ValueError: arrays must all be same length
Message-ID: <CA+i5JwbLEZ+XHNurs3+eOS4vis2hEcVs6Qanpa=_sHk8mtQyhA@mail.gmail.com>

Hello,

I got a json response from an API and tried to use pandas to put data into
a dataframe.

However, I kept getting this ValueError: arrays must all be same length.

Can anyone help?

The following is the json text.  Regards, Shao

{
  "locationId": "1-1004508435",
  "providerId": "1-101641521",
  "organisationType": "Location",
  "type": "Social Care Org",
  "name": "Meadow Rose Nursing Home",
  "brandId": "BD510",
  "brandName": "BRAND MACC Care",
  "onspdCcgCode": "E38000220",
  "onspdCcgName": "NHS Birmingham and Solihull CCG",
  "odsCode": "VM4G9",
  "uprn": "100070537642",
  "registrationStatus": "Registered",
  "registrationDate": "2013-12-16",
  "dormancy": "N",
  "numberOfBeds": 56,
  "postalAddressLine1": "96 The Roundabout",
  "postalAddressTownCity": "Birmingham",
  "postalAddressCounty": "West Midlands",
  "region": "West Midlands",
  "postalCode": "B31 2TX",
  "onspdLatitude": 52.399843,
  "onspdLongitude": -1.989241,
  "careHome": "Y",
  "inspectionDirectorate": "Adult social care",
  "mainPhoneNumber": "01214769808",
  "constituency": "Birmingham, Northfield",
  "localAuthority": "Birmingham",
  "lastInspection": {
    "date": "2020-06-24"
  },
  "lastReport": {
    "publicationDate": "2020-10-01"
  },
  "relationships": [

  ],
  "locationTypes": [

  ],
  "regulatedActivities": [
    {
      "name": "Accommodation for persons who require nursing or personal care",
      "code": "RA2",
      "contacts": [
        {
          "personTitle": "Mr",
          "personGivenName": "Steven",
          "personFamilyName": "Kazembe",
          "personRoles": [
            "Registered Manager"
          ]
        }
      ]
    },
    {
      "name": "Treatment of disease, disorder or injury",
      "code": "RA5",
      "contacts": [
        {
          "personTitle": "Mr",
          "personGivenName": "Steven",
          "personFamilyName": "Kazembe",
          "personRoles": [
            "Registered Manager"
          ]
        }
      ]
    }
  ],
  "gacServiceTypes": [
    {
      "name": "Nursing homes",
      "description": "Care home service with nursing"
    }
  ],
  "inspectionCategories": [
    {
      "code": "S1",
      "primary": "true",
      "name": "Residential social care"
    }
  ],
  "specialisms": [
    {
      "name": "Caring for adults over 65 yrs"
    },
    {
      "name": "Caring for adults under 65 yrs"
    },
    {
      "name": "Dementia"
    },
    {
      "name": "Physical disabilities"
    }
  ],
  "inspectionAreas": [

  ],
  "currentRatings": {
    "overall": {
      "rating": "Requires improvement",
      "reportDate": "2020-10-01",
      "reportLinkId": "1157c975-c2f1-423e-a2b4-66901779e014",
      "useOfResources": {

      },
      "keyQuestionRatings": [
        {
          "name": "Safe",
          "rating": "Requires improvement",
          "reportDate": "2020-10-01",
          "reportLinkId": "1157c975-c2f1-423e-a2b4-66901779e014"
        },
        {
          "name": "Well-led",
          "rating": "Requires improvement",
          "reportDate": "2020-10-01",
          "reportLinkId": "1157c975-c2f1-423e-a2b4-66901779e014"
        },
        {
          "name": "Caring",
          "rating": "Good",
          "reportDate": "2019-10-04",
          "reportLinkId": "63ff05ec-4d31-406e-83de-49a271cfdc43"
        },
        {
          "name": "Responsive",
          "rating": "Good",
          "reportDate": "2019-10-04",
          "reportLinkId": "63ff05ec-4d31-406e-83de-49a271cfdc43"
        },
        {
          "name": "Effective",
          "rating": "Requires improvement",
          "reportDate": "2019-10-04",
          "reportLinkId": "63ff05ec-4d31-406e-83de-49a271cfdc43"
        }
      ]
    },
    "reportDate": "2020-10-01"
  },
  "historicRatings": [
    {
      "reportLinkId": "63ff05ec-4d31-406e-83de-49a271cfdc43",
      "reportDate": "2019-10-04",
      "overall": {
        "rating": "Requires improvement",
        "keyQuestionRatings": [
          {
            "name": "Safe",
            "rating": "Requires improvement"
          },
          {
            "name": "Well-led",
            "rating": "Requires improvement"
          }
        ]
      }
    },
    {
      "reportLinkId": "4f20da40-89a4-4c45-a7f9-bfd52b48f286",
      "reportDate": "2017-09-08",
      "overall": {
        "rating": "Good",
        "keyQuestionRatings": [
          {
            "name": "Safe",
            "rating": "Good"
          },
          {
            "name": "Well-led",
            "rating": "Good"
          },
          {
            "name": "Caring",
            "rating": "Good"
          },
          {
            "name": "Responsive",
            "rating": "Good"
          },
          {
            "name": "Effective",
            "rating": "Requires improvement"
          }
        ]
      }
    },
    {
      "reportLinkId": "0cc4226b-401e-4f0f-ba35-062cbadffa8f",
      "reportDate": "2016-06-11",
      "overall": {
        "rating": "Requires improvement",
        "keyQuestionRatings": [
          {
            "name": "Safe",
            "rating": "Requires improvement"
          },
          {
            "name": "Well-led",
            "rating": "Requires improvement"
          },
          {
            "name": "Caring",
            "rating": "Requires improvement"
          },
          {
            "name": "Responsive",
            "rating": "Requires improvement"
          },
          {
            "name": "Effective",
            "rating": "Good"
          }
        ]
      }
    },
    {
      "reportLinkId": "a11c1e52-ddfd-4cd8-8b56-1b96ac287c96",
      "reportDate": "2015-01-12",
      "overall": {
        "rating": "Good",
        "keyQuestionRatings": [
          {
            "name": "Safe",
            "rating": "Good"
          },
          {
            "name": "Well-led",
            "rating": "Good"
          },
          {
            "name": "Caring",
            "rating": "Good"
          },
          {
            "name": "Responsive",
            "rating": "Requires improvement"
          },
          {
            "name": "Effective",
            "rating": "Good"
          }
        ]
      }
    }
  ],
  "reports": [
    {
      "linkId": "1157c975-c2f1-423e-a2b4-66901779e014",
      "reportDate": "2020-10-01",
      "reportUri": "/reports/1157c975-c2f1-423e-a2b4-66901779e014",
      "firstVisitDate": "2020-06-23",
      "reportType": "Location"
    },
    {
      "linkId": "63ff05ec-4d31-406e-83de-49a271cfdc43",
      "reportDate": "2019-10-04",
      "reportUri": "/reports/63ff05ec-4d31-406e-83de-49a271cfdc43",
      "firstVisitDate": "2019-08-28",
      "reportType": "Location"
    },
    {
      "linkId": "4f20da40-89a4-4c45-a7f9-bfd52b48f286",
      "reportDate": "2017-09-08",
      "reportUri": "/reports/4f20da40-89a4-4c45-a7f9-bfd52b48f286",
      "firstVisitDate": "2017-07-19",
      "reportType": "Location"
    },
    {
      "linkId": "0cc4226b-401e-4f0f-ba35-062cbadffa8f",
      "reportDate": "2016-06-11",
      "reportUri": "/reports/0cc4226b-401e-4f0f-ba35-062cbadffa8f",
      "firstVisitDate": "2016-03-15",
      "reportType": "Location"
    },
    {
      "linkId": "a11c1e52-ddfd-4cd8-8b56-1b96ac287c96",
      "reportDate": "2015-01-12",
      "reportUri": "/reports/a11c1e52-ddfd-4cd8-8b56-1b96ac287c96",
      "firstVisitDate": "2014-08-12",
      "reportType": "Location"
    }
  ]
}

In [ ]:


In [25]:
j


import pandas as pd

import json

j = json.JSONDecoder().decode(req.text)  ###req.json

df = pd.DataFrame.from_dict(j)
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/pandas-dev/attachments/20201002/a2383797/attachment-0001.html>

From jorisvandenbossche at gmail.com  Tue Oct 13 11:34:28 2020
From: jorisvandenbossche at gmail.com (Joris Van den Bossche)
Date: Tue, 13 Oct 2020 17:34:28 +0200
Subject: [Pandas-dev] October 2020 monthly dev meeting (October 14,
 UTC 18:00)
Message-ID: <CALQtMBaXC6PK4hOHUsPHcdm-6+J9Au8e5dq1p3XT9LWUyGfmrA@mail.gmail.com>

Hi all,

The next monthly dev call is tomorrow (Wednesday, October 14th) at 18:00
UTC. All are welcome to attend!
More details (agenda/minutes, video link) can be found at
https://github.com/pandas-dev/pandas/issues/37077.

Joris
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/pandas-dev/attachments/20201013/71bb0a30/attachment.html>

From shishaozhong at gmail.com  Wed Oct 14 15:37:05 2020
From: shishaozhong at gmail.com (Shaozhong SHI)
Date: Wed, 14 Oct 2020 20:37:05 +0100
Subject: [Pandas-dev] Which one is the fastest one for getting data into
 PostgreSQL
Message-ID: <CA+i5JwY7wAQUC0QDwtSmXRJZFzPK_Ue2WcYr6UpnUhKXAhaT4w@mail.gmail.com>

I am using the following code to get data into PostgreSQL.

However, it appears to be very slow.

from sqlalchemy import create_engine
import psycopg2
engine = create_engine('postgresql+psycopg2://username:password at host
:5432/mydatabase')
out.to_sql('table_name2', engine, if_exists='replace', index=None)

Are there any fast way to do it?

Looking forward to hearing from you.

Regards,

David
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mail.python.org/pipermail/pandas-dev/attachments/20201014/0c0a1757/attachment.html>

From simongibbons at gmail.com  Wed Oct 14 16:49:32 2020
From: simongibbons at gmail.com (Simon Gibbons)
Date: Wed, 14 Oct 2020 21:49:32 +0100
Subject: [Pandas-dev] Which one is the fastest one for getting data into
 PostgreSQL
In-Reply-To: <CA+i5JwY7wAQUC0QDwtSmXRJZFzPK_Ue2WcYr6UpnUhKXAhaT4w@mail.gmail.com>
References: <CA+i5JwY7wAQUC0QDwtSmXRJZFzPK_Ue2WcYr6UpnUhKXAhaT4w@mail.gmail.com>
Message-ID: <CANe7ayup=sDYpN9yq24wuKjtG4g7cVMF0cXwNPOKy7sVC+Y44Q@mail.gmail.com>

Hi David,

This is likely slow as to_sql dy default produces one INSERT statement for
every row in the dataframe. This means that writing your dataframe to the
database will need many network round trips.

Try adding the argument

method='multi'

to your call, which will batch up the inserts and should be faster (see
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html
)

Best,

Simon

On Wed, Oct 14, 2020 at 8:43 PM Shaozhong SHI <shishaozhong at gmail.com>
wrote:

> I am using the following code to get data into PostgreSQL.
>
> However, it appears to be very slow.
>
> from sqlalchemy import create_engine
> import psycopg2
> engine = create_engine('postgresql+psycopg2://username:password at host
> :5432/mydatabase')
> out.to_sql('table_name2', engine, if_exists='replace', index=None)
>
> Are there any fast way to do it?
>
> Looking forward to hearing from you.
>
> Regards,
>
> David
> _______________________________________________
> Pandas-dev mailing list
> Pandas-dev at python.org
> https://mail.python.org/mailman/listinfo/pandas-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mail.python.org/pipermail/pandas-dev/attachments/20201014/1ae29f9f/attachment.html>

From shishaozhong at gmail.com  Thu Oct 15 05:13:00 2020
From: shishaozhong at gmail.com (Shaozhong SHI)
Date: Thu, 15 Oct 2020 10:13:00 +0100
Subject: [Pandas-dev] Which one is the fastest one for getting data into
 PostgreSQL
In-Reply-To: <CANe7ayup=sDYpN9yq24wuKjtG4g7cVMF0cXwNPOKy7sVC+Y44Q@mail.gmail.com>
References: <CA+i5JwY7wAQUC0QDwtSmXRJZFzPK_Ue2WcYr6UpnUhKXAhaT4w@mail.gmail.com>
 <CANe7ayup=sDYpN9yq24wuKjtG4g7cVMF0cXwNPOKy7sVC+Y44Q@mail.gmail.com>
Message-ID: <CA+i5JwYh9yeq+6jbPzWc3sJs1w0D6sZ0Y_V0a1DHNHsYNmpwfw@mail.gmail.com>

Hi, Simon,

Has anyone tested how much faster it can be, when method='multi' is applied?

Regards,

David

On Wed, 14 Oct 2020 at 21:49, Simon Gibbons <simongibbons at gmail.com> wrote:

> Hi David,
>
> This is likely slow as to_sql dy default produces one INSERT statement for
> every row in the dataframe. This means that writing your dataframe to the
> database will need many network round trips.
>
> Try adding the argument
>
> method='multi'
>
> to your call, which will batch up the inserts and should be faster (see
> https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html
> )
>
> Best,
>
> Simon
>
> On Wed, Oct 14, 2020 at 8:43 PM Shaozhong SHI <shishaozhong at gmail.com>
> wrote:
>
>> I am using the following code to get data into PostgreSQL.
>>
>> However, it appears to be very slow.
>>
>> from sqlalchemy import create_engine
>> import psycopg2
>> engine = create_engine('postgresql+psycopg2://username:password at host
>> :5432/mydatabase')
>> out.to_sql('table_name2', engine, if_exists='replace', index=None)
>>
>> Are there any fast way to do it?
>>
>> Looking forward to hearing from you.
>>
>> Regards,
>>
>> David
>> _______________________________________________
>> Pandas-dev mailing list
>> Pandas-dev at python.org
>> https://mail.python.org/mailman/listinfo/pandas-dev
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mail.python.org/pipermail/pandas-dev/attachments/20201015/1b2380b8/attachment.html>

From xhochy at gmail.com  Thu Oct 15 09:45:59 2020
From: xhochy at gmail.com (Uwe L. Korn)
Date: Thu, 15 Oct 2020 15:45:59 +0200
Subject: [Pandas-dev] Which one is the fastest one for getting data into
 PostgreSQL
In-Reply-To: <CA+i5JwYh9yeq+6jbPzWc3sJs1w0D6sZ0Y_V0a1DHNHsYNmpwfw@mail.gmail.com>
References: <CA+i5JwY7wAQUC0QDwtSmXRJZFzPK_Ue2WcYr6UpnUhKXAhaT4w@mail.gmail.com>
 <CANe7ayup=sDYpN9yq24wuKjtG4g7cVMF0cXwNPOKy7sVC+Y44Q@mail.gmail.com>
 <CA+i5JwYh9yeq+6jbPzWc3sJs1w0D6sZ0Y_V0a1DHNHsYNmpwfw@mail.gmail.com>
Message-ID: <CAGSNw=D2mya4HJH_+uFQ6FnKOO+qAqMt_HvRwKQDX3p5MFm88g@mail.gmail.com>

Hello,

for Postgres there is actually an example in the documentation on how you
can get a much better INSERT performance by using the COPY statement:
https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#insertion-method

This should perform even better than using `multi`.

Cheers
Uwe

Am Do., 15. Okt. 2020 um 11:17 Uhr schrieb Shaozhong SHI <
shishaozhong at gmail.com>:

> Hi, Simon,
>
> Has anyone tested how much faster it can be, when method='multi' is
> applied?
>
> Regards,
>
> David
>
> On Wed, 14 Oct 2020 at 21:49, Simon Gibbons <simongibbons at gmail.com>
> wrote:
>
>> Hi David,
>>
>> This is likely slow as to_sql dy default produces one INSERT statement
>> for every row in the dataframe. This means that writing your dataframe to
>> the database will need many network round trips.
>>
>> Try adding the argument
>>
>> method='multi'
>>
>> to your call, which will batch up the inserts and should be faster (see
>> https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html
>> )
>>
>> Best,
>>
>> Simon
>>
>> On Wed, Oct 14, 2020 at 8:43 PM Shaozhong SHI <shishaozhong at gmail.com>
>> wrote:
>>
>>> I am using the following code to get data into PostgreSQL.
>>>
>>> However, it appears to be very slow.
>>>
>>> from sqlalchemy import create_engine
>>> import psycopg2
>>> engine = create_engine('postgresql+psycopg2://username:password at host
>>> :5432/mydatabase')
>>> out.to_sql('table_name2', engine, if_exists='replace', index=None)
>>>
>>> Are there any fast way to do it?
>>>
>>> Looking forward to hearing from you.
>>>
>>> Regards,
>>>
>>> David
>>> _______________________________________________
>>> Pandas-dev mailing list
>>> Pandas-dev at python.org
>>> https://mail.python.org/mailman/listinfo/pandas-dev
>>>
>> _______________________________________________
> Pandas-dev mailing list
> Pandas-dev at python.org
> https://mail.python.org/mailman/listinfo/pandas-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mail.python.org/pipermail/pandas-dev/attachments/20201015/8f9af6e9/attachment-0001.html>

From tom.augspurger88 at gmail.com  Tue Oct 20 10:45:03 2020
From: tom.augspurger88 at gmail.com (Tom Augspurger)
Date: Tue, 20 Oct 2020 09:45:03 -0500
Subject: [Pandas-dev] NASA Funding opportunity
Message-ID: <CAE1aY-=B_d6eiZTGtBR2VQ9McPUUHkDYKhgVhAR1FMR5SRnrPQ@mail.gmail.com>

Hi all,

NASA has a new CFP that's focused on supporting existing open-source
projects.
https://nspires.nasaprs.com/external/solicitations/summary.do?solId=%7b958CF134-D655-E512-B5AD-84501D14A0C1%7d

1.1 Motivation
Open source software tools, libraries, and frameworks play an increasingly
prominent
role in SMD-related science research and applications. As the adoption of
open
software accelerates the rate of scientific discovery, the National
Academies? has
recognized the growing need among the NASA science community to provide
sustained
support and maintenance of these types of software in their 2018 report
Open Source
Software Policy Options for NASA Earth and Space Sciences. This program is
designed
to provide support to the sustainable development of open source software,
tools,
libraries, and framework that are critical for SMD scientific objectives.

1.2 Objectives
SMD seeks proposals for the improvement and sustainment of high-value, open
source
tools, frameworks, and libraries that have made significant impacts to the
SMD science
community. We are seeking proposals that satisfy the following objectives:
? Open source software tools, libraries, and frameworks that have
significant
usage in the NASA science community, developed following open and
collaborative practices, and are aligned with the scientific vision and data
strategic plan of SMD.
? Proposals should look to improve the sustainability and utility of these
packages
through improvements to adding extensions, documentation, infrastructure,
and
maintenance of the software.
This program seeks to support projects under active development and usage,
and it
does not support updating of legacy software that is no longer supported,
which can be
supported under other calls. We are not soliciting the development of new
open source
tools, frameworks, or libraries with this call.

I'm not sure exactly what our chances to get funded will be compared to
projects more squarely in the science domain, but this may be worth
pursing. Is anyone interested in collaborating on a proposal?

Timeline, we'd file a notice of intent by November 19th, and the proposal
by January 19th.

Tom

Tom
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mail.python.org/pipermail/pandas-dev/attachments/20201020/38babb23/attachment.html>

From andy.terrel at gmail.com  Tue Oct 20 13:02:10 2020
From: andy.terrel at gmail.com (Andy Ray Terrel)
Date: Tue, 20 Oct 2020 12:02:10 -0500
Subject: [Pandas-dev] NASA Funding opportunity
In-Reply-To: <CAE1aY-=B_d6eiZTGtBR2VQ9McPUUHkDYKhgVhAR1FMR5SRnrPQ@mail.gmail.com>
References: <CAE1aY-=B_d6eiZTGtBR2VQ9McPUUHkDYKhgVhAR1FMR5SRnrPQ@mail.gmail.com>
Message-ID: <CA+WonSR3wCfV+yp_pTeKR-2xQhqpxEVirMuNZw4BtrRGB8WkNQ@mail.gmail.com>

FWIW, NumFOCUS is running a few grants with NASA right now. I can
definitely help with a proposal.

On Tue, Oct 20, 2020 at 9:45 AM Tom Augspurger <tom.augspurger88 at gmail.com>
wrote:

> Hi all,
>
> NASA has a new CFP that's focused on supporting existing open-source
> projects.
> https://nspires.nasaprs.com/external/solicitations/summary.do?solId=%7b958CF134-D655-E512-B5AD-84501D14A0C1%7d
>
> 1.1 Motivation
> Open source software tools, libraries, and frameworks play an increasingly
> prominent
> role in SMD-related science research and applications. As the adoption of
> open
> software accelerates the rate of scientific discovery, the National
> Academies? has
> recognized the growing need among the NASA science community to provide
> sustained
> support and maintenance of these types of software in their 2018 report
> Open Source
> Software Policy Options for NASA Earth and Space Sciences. This program is
> designed
> to provide support to the sustainable development of open source software,
> tools,
> libraries, and framework that are critical for SMD scientific objectives.
>
> 1.2 Objectives
> SMD seeks proposals for the improvement and sustainment of high-value,
> open source
> tools, frameworks, and libraries that have made significant impacts to the
> SMD science
> community. We are seeking proposals that satisfy the following objectives:
> ? Open source software tools, libraries, and frameworks that have
> significant
> usage in the NASA science community, developed following open and
> collaborative practices, and are aligned with the scientific vision and
> data
> strategic plan of SMD.
> ? Proposals should look to improve the sustainability and utility of these
> packages
> through improvements to adding extensions, documentation, infrastructure,
> and
> maintenance of the software.
> This program seeks to support projects under active development and usage,
> and it
> does not support updating of legacy software that is no longer supported,
> which can be
> supported under other calls. We are not soliciting the development of new
> open source
> tools, frameworks, or libraries with this call.
>
> I'm not sure exactly what our chances to get funded will be compared to
> projects more squarely in the science domain, but this may be worth
> pursing. Is anyone interested in collaborating on a proposal?
>
> Timeline, we'd file a notice of intent by November 19th, and the proposal
> by January 19th.
>
> Tom
>
> Tom
>
> _______________________________________________
> Pandas-dev mailing list
> Pandas-dev at python.org
> https://mail.python.org/mailman/listinfo/pandas-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mail.python.org/pipermail/pandas-dev/attachments/20201020/208fb1e6/attachment.html>

From robertb at sccwrp.org  Fri Oct 23 07:53:09 2020
From: robertb at sccwrp.org (Robert Butler)
Date: Fri, 23 Oct 2020 11:53:09 +0000
Subject: [Pandas-dev] Help with contributing
Message-ID: <BYAPR08MB4631992EBDA109E13AA47035D61A0@BYAPR08MB4631.namprd08.prod.outlook.com>

Hi Pandas dev

I am developing a small web application using flask and pandas
Part of it involved making an api call using fetch to the server, and the server had to return some data formatted as html

The code that executes during the fetch request does a lot, and when I used the dataframe.to_html() method it took a while

I spent some time making a function that converts a dataframe to html and I was able to get something that runs a lot faster, and the difference is pretty noticeable when the dataframe is a lot larger

When I looked at the source code, it seemed like it would be difficult to incorporate the function as a contribution, so I was wondering if I could get some help?

I have roughly 2 years of experience with programming in python, and did not major in CS, so I consider myself to be a beginner, which Is why I struggle with something basic like making a contribution like this

I was wondering if I could get some help making this contribution?

This is the function, obviously you can tell I sort of wrote it in such a way that it would be convenient for my particular application

def htmltable(df, id = None, cssclass = None, enumeraterows = True):
    '''
        df is a pandas dataframe,
        id is a css id you want to give to the table,
        cssclass is a css class for the table,
        enumeraterows actually only distinguishes even/odd rows with css classes
    '''

    html = """
    <table{}{}>
        <colgroup>
            {}
        </colgroup>
        <thead>
            {}
        </thead>
        <tbody>
            {}
        </tbody>
    </table>
    """.format(
        # add in the id
        f" id = {id}" if id else "",

        # add the class
        f" class = {cssclass}" if cssclass else "",

        # colgroups
        ''.join(['<col span="1" class="{}">'.format(colname) for colname in df.columns]),

        # column headers
        ''.join(
            [
                # sticks on the outsides of the row after doing the join
                '<tr><th scope="col">{}</th></tr>'.format(
                    '</th><th scope="col">'.join(df.columns)
                )
            ]
        ),
        # cells of table body
        ''.join(
            [
                # sticks on the outsides of the row after doing the join
                # adds even and odd css classes to each row as well
                '<tr{} id="rownumber-{}">{}</tr>'.format(
                    ' class="row-even"' if i % 2 == 0 else ' class="row-odd"' if enumeraterows else "",
                    i,
                    x
                ) for i,x in
                # Zips columns together, then joins them with closing table cell tag and opening table cell tag between
                enumerate([''.join(
                    list(
                        map(
                            lambda cell:
                            '<td contenteditable="true" class="colname-{}">{}</td>'.format(cell['column_name'], cell['column_value']),
                            row
                        )
                    )
                )
                for row in
                    zip(*
                        [
                            df[col].apply(lambda x: {'column_name':col, 'column_value': x}) for col in df.columns
                        ]
                    )
                ])
            ]
        )
    )
    return html


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mail.python.org/pipermail/pandas-dev/attachments/20201023/e8bb44e5/attachment-0001.html>

From william.ayd at icloud.com  Fri Oct 23 12:07:50 2020
From: william.ayd at icloud.com (William Ayd)
Date: Fri, 23 Oct 2020 09:07:50 -0700
Subject: [Pandas-dev] Help with contributing
In-Reply-To: <BYAPR08MB4631992EBDA109E13AA47035D61A0@BYAPR08MB4631.namprd08.prod.outlook.com>
References: <BYAPR08MB4631992EBDA109E13AA47035D61A0@BYAPR08MB4631.namprd08.prod.outlook.com>
Message-ID: <B00A0067-470B-4D5D-9031-B0A7BCE7235D@icloud.com>

Thanks for the interest Robert. The best advice I can give is to break up the problem into very small pieces and try to approach improvements to the code base from there. You?ve written a function for your own purpose which is awesome, but it is unlikely that pandas would just adopt that function on its own. So instead I would suggest to take a step back and focus on the problem that ?to_html() is too slow?.

If that?s the problem you have nailed down I would then suggest trying to dig a little deeper by:
	1. Searching the existing issue tracker on GitHub for similar issues
	2. Profiling what exactly makes it slow

If #1 works out any clarifications you can add to the existing issue (and of course PRs to solve) would be helpful. If you don?t see any existing issue but can provide a timing profile of performance, I would advise opening a dedicated issue with that information.

Hope that helps.

- Will

> On Oct 23, 2020, at 4:53 AM, Robert Butler <robertb at sccwrp.org> wrote:
> 
> Hi Pandas dev
>  
> I am developing a small web application using flask and pandas
> Part of it involved making an api call using fetch to the server, and the server had to return some data formatted as html
>  
> The code that executes during the fetch request does a lot, and when I used the dataframe.to_html() method it took a while
>  
> I spent some time making a function that converts a dataframe to html and I was able to get something that runs a lot faster, and the difference is pretty noticeable when the dataframe is a lot larger
>  
> When I looked at the source code, it seemed like it would be difficult to incorporate the function as a contribution, so I was wondering if I could get some help?
>  
> I have roughly 2 years of experience with programming in python, and did not major in CS, so I consider myself to be a beginner, which Is why I struggle with something basic like making a contribution like this
>  
> I was wondering if I could get some help making this contribution?
>  
> This is the function, obviously you can tell I sort of wrote it in such a way that it would be convenient for my particular application
>  
> def htmltable(df, id = None, cssclass = None, enumeraterows = True):
>     '''
>         df is a pandas dataframe,
>         id is a css id you want to give to the table,
>         cssclass is a css class for the table,
>         enumeraterows actually only distinguishes even/odd rows with css classes
>     '''
>  
>     html = """
>     <table{}{}>
>         <colgroup>
>             {}
>         </colgroup>
>         <thead>
>             {}
>         </thead>
>         <tbody>
>             {}
>         </tbody>
>     </table>   
>     """.format(
>         # add in the id
>         f" id = {id}" if id else "",
>  
>         # add the class
>         f" class = {cssclass}" if cssclass else "",
>        
>         # colgroups
>         ''.join(['<col span="1" class="{}">'.format(colname) for colname in df.columns]),
>        
>         # column headers
>         ''.join(
>             [  
>                 # sticks on the outsides of the row after doing the join
>                 '<tr><th scope="col">{}</th></tr>'.format(
>                     '</th><th scope="col">'.join(df.columns)
>                 )
>             ]
>         ),
>         # cells of table body
>         ''.join(
>             [
>                 # sticks on the outsides of the row after doing the join
>                 # adds even and odd css classes to each row as well
>                 '<tr{} id="rownumber-{}">{}</tr>'.format(
>                     ' class="row-even"' if i % 2 == 0 else ' class="row-odd"' if enumeraterows else "",
>                     i,
>                     x
>                 ) for i,x in
>                 # Zips columns together, then joins them with closing table cell tag and opening table cell tag between
>                 enumerate([''.join(
>                     list(
>                         map(
>                             lambda cell:
>                             '<td contenteditable="true" class="colname-{}">{}</td>'.format(cell['column_name'], cell['column_value']),
>                             row
>                         )
>                     )
>                 )
>                 for row in
>                     zip(*
>                         [
>                             df[col].apply(lambda x: {'column_name':col, 'column_value': x}) for col in df.columns
>                         ]
>                     )
>                 ])
>             ]   
>         )
>     )
>     return html
>  
>  
>  
> _______________________________________________
> Pandas-dev mailing list
> Pandas-dev at python.org <mailto:Pandas-dev at python.org>
> https://mail.python.org/mailman/listinfo/pandas-dev <https://mail.python.org/mailman/listinfo/pandas-dev>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mail.python.org/pipermail/pandas-dev/attachments/20201023/28f1dc93/attachment-0001.html>