googleapis · tswast · Jan 17, 2018 · Jan 12, 2018 · Jan 12, 2018 · Jan 12, 2018
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -1,6 +1,13 @@
 Changelog
 =========
 
+
+0.3.1 / [TBD]
+------------------
+
+- Fix an issue where Unicode couldn't be uploaded in Python 2 (:issue:`93`)
+
+
 0.3.0 / 2018-01-03
 ------------------
 

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
@@ -559,7 +559,7 @@ def run_query(self, query, **kwargs):
 
     def load_data(self, dataframe, dataset_id, table_id, chunksize):
         from google.cloud.bigquery import LoadJobConfig
-        from six import StringIO
+        from six import BytesIO
 
         destination_table = self.client.dataset(dataset_id).table(table_id)
         job_config = LoadJobConfig()
@@ -581,7 +581,11 @@ def load_data(self, dataframe, dataset_id, table_id, chunksize):
                 self._print("\rLoad is {0}% Complete".format(
                     ((total_rows - remaining_rows) * 100) / total_rows))
 
-                body = StringIO('{}\n'.format('\n'.join(rows)))
+                body = '{}\n'.format('\n'.join(rows))
+                if isinstance(body, bytes):
+                    body = body.decode('utf-8')
+                body = body.encode('utf-8')
+                body = BytesIO(body)
 
                 try:
                     self.client.load_table_from_file(

diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 import pytest
 
 import re
@@ -7,6 +9,7 @@
 import os
 from random import randint
 import logging
+import sys
 
 import numpy as np
 
@@ -1154,6 +1157,61 @@ def test_google_upload_errors_should_raise_exception(self):
             gbq.to_gbq(bad_df, self.destination_table + test_id,
                        _get_project_id(), private_key=_get_private_key_path())
 
+    def test_upload_chinese_unicode_data(self):
+        test_id = "2"
+        test_size = 6
+        df = DataFrame(np.random.randn(6, 4), index=range(6),
+                       columns=list('ABCD'))
+        df['s'] = u'信用卡'
+
+        gbq.to_gbq(df, self.destination_table + test_id, _get_project_id(),
+                   chunksize=10000)
+
+        result_df = gbq.read_gbq("SELECT * FROM {0}".format(
+            self.destination_table + test_id),
+            project_id=_get_project_id())
+
+        assert len(result_df) == test_size
+
+        pytest.skipif(
+            sys.version_info.major < 3,
+            reason='Unicode comparison in Py2 not working')
+
+        result = result_df['s'].sort_values()
+        expected = df['s'].sort_values()
+
+        tm.assert_numpy_array_equal(expected.values, result.values)
+
+    def test_upload_other_unicode_data(self):
+        test_id = "3"
+        test_size = 3
+        df = DataFrame({
+            's': ['Skywalker™', 'lego', 'hülle'],
+            'i': [200, 300, 400],
+            'd': [
+                '2017-12-13 17:40:39', '2017-12-13 17:40:39',
+                '2017-12-13 17:40:39'
+            ]
+        })
+
+        gbq.to_gbq(df, self.destination_table + test_id, _get_project_id(),
+                   chunksize=10000)
+
+        result_df = gbq.read_gbq("SELECT * FROM {0}".format(
+            self.destination_table + test_id),
+            project_id=_get_project_id())
+
+        assert len(result_df) == test_size
+
+        pytest.skipif(
+            sys.version_info.major < 3,
+            reason='Unicode comparison in Py2 not working')
+
+        result = result_df['s'].sort_values()
+        expected = df['s'].sort_values()
+
+        tm.assert_numpy_array_equal(expected.values, result.values)
+
     def test_generate_schema(self):
         df = tm.makeMixedDataFrame()
         schema = gbq._generate_bq_schema(df)
@@ -1467,6 +1525,59 @@ def test_upload_data(self):
 
         assert result['num_rows'][0] == test_size
 
+    def test_upload_chinese_unicode_data(self):
+        test_id = "2"
+        test_size = 6
+        df = DataFrame(np.random.randn(6, 4), index=range(6),
+                       columns=list('ABCD'))
+        df['s'] = u'信用卡'
+
+        gbq.to_gbq(df, self.destination_table + test_id, _get_project_id(),
+                   chunksize=10000)
+
+        result_df = gbq.read_gbq("SELECT * FROM {0}".format(
+            self.destination_table + test_id),
+            project_id=_get_project_id())
+
+        assert len(result_df) == test_size
+
+        if sys.version_info.major < 3:
+            pytest.skip(msg='Unicode comparison in Py2 not working')
+
+        result = result_df['s'].sort_values()
+        expected = df['s'].sort_values()
+
+        tm.assert_numpy_array_equal(expected.values, result.values)
+
+    def test_upload_other_unicode_data(self):
+        test_id = "3"
+        test_size = 3
+        df = DataFrame({
+            's': ['Skywalker™', 'lego', 'hülle'],
+            'i': [200, 300, 400],
+            'd': [
+                '2017-12-13 17:40:39', '2017-12-13 17:40:39',
+                '2017-12-13 17:40:39'
+            ]
+        })
+
+        gbq.to_gbq(df, self.destination_table + test_id, _get_project_id(),
+                   chunksize=10000)
+
+        result_df = gbq.read_gbq("SELECT * FROM {0}".format(
+            self.destination_table + test_id),
+            project_id=_get_project_id())
+
+        assert len(result_df) == test_size
+
+        if sys.version_info.major < 3:
+            pytest.skip(msg='Unicode comparison in Py2 not working')
+
+        result = result_df['s'].sort_values()
+        expected = df['s'].sort_values()
+
+        tm.assert_numpy_array_equal(expected.values, result.values)
+
 
 class TestToGBQIntegrationWithServiceAccountKeyContents(object):
     # Changes to BigQuery table schema may take up to 2 minutes as of May 2015