From 2dccf10a388f16a063da9b33a1b1948e52a1a777 Mon Sep 17 00:00:00 2001 From: "Klaus S. Madsen" Date: Tue, 4 Nov 2014 21:25:08 +0100 Subject: [PATCH] Encode content before calling LWP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When LWP encodes a HASH-ref, as is done in generic_solr_request it will use the URI module to create the www-form-urlencoded content from the HASH-ref. The URI module will try to deduce the desired target charset from the utf8-flag on the strings, which means that the strings sent to Solr aren't UTF-8 encoded unless they have been either decoded, or encoded as utf8. This is confusing, as e.g. the string "\xc6" is passed on to Solr as an UTF-8 encoded Æ passed through the add-method, but the same string will result in an error from Solr if used with the search-method. To fix this issue, encode all of the strings from the parameters in generic_solr_request before passing them on to LWP. This way the charset behaviour of generic_solr_request and _send_update is aligned. Note: This will break applications that encode strings to UTF-8 before calling WebService::Solr generic_solr_request, search or auto_suggest. --- lib/WebService/Solr.pm | 14 +++++++++++++- t/request/search.t | 11 ++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/lib/WebService/Solr.pm b/lib/WebService/Solr.pm index bae6a26..292991c 100644 --- a/lib/WebService/Solr.pm +++ b/lib/WebService/Solr.pm @@ -161,7 +161,7 @@ sub generic_solr_request { $self->agent->post( $self->_gen_url( $path ), Content_Type => 'application/x-www-form-urlencoded; charset=utf-8', - Content => { $self->default_params, %$params } ) ) ); + Content => $self->_encode_content( { $self->default_params, %$params } ) ) ) ); } sub _gen_url { @@ -172,6 +172,18 @@ sub _gen_url { return $url; } +sub _encode_content { + my ( $self, $param ) = @_; + + if ( ref $param eq "HASH" ) { + return { map { $self->_encode_content($_) } %$param }; + } elsif ( ref $param eq "ARRAY" ) { + return [ map { $self->_encode_content($_) } @$param ]; + } else { + return encode('utf-8', $param); + } +} + sub _send_update { my ( $self, $xml, $params, $autocommit ) = @_; $autocommit = $self->autocommit unless defined $autocommit; diff --git a/t/request/search.t b/t/request/search.t index b1e05e0..d0070e9 100644 --- a/t/request/search.t +++ b/t/request/search.t @@ -1,7 +1,7 @@ use strict; use warnings; -use Test::More tests => 5; +use Test::More tests => 8; use Test::Mock::LWP; use WebService::Solr; @@ -28,6 +28,15 @@ my ( $expect_path, $expect_params ); is $solr->last_response, undef, "The last_response attribute hasn't been set yet"; $solr->search( 'foo' ); isa_ok $solr->last_response, 'WebService::Solr::Response'; + + $expect_params = { + q => "\xc3\x86\xc3\x98\xc3\x85", + wt => 'json', + fl => 'id', + fq => [ 'id:[0 TO 42]', "value:\xc3\x86\xc3\x98\xc3\x85" ] + }; + $solr->search( "\xc6\xd8\xc5", { fl => 'id', fq => [ 'id:[0 TO 42]', "value:\xc6\xd8\xc5" ] } ); + isa_ok $solr->last_response, 'WebService::Solr::Response'; } sub _test_req {