test_utf8.sh 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. #!/bin/sh
  2. # Licensed to the Apache Software Foundation (ASF) under one or more
  3. # contributor license agreements. See the NOTICE file distributed with
  4. # this work for additional information regarding copyright ownership.
  5. # The ASF licenses this file to You under the Apache License, Version 2.0
  6. # (the "License"); you may not use this file except in compliance with
  7. # the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #Test script to tell if the server is accepting UTF-8
  17. #The python writer currently escapes non-ascii chars, so it's good for testing
  18. SOLR_URL=http://localhost:8983/solr
  19. if [ ! -z $1 ]; then
  20. SOLR_URL=$1
  21. fi
  22. curl "$SOLR_URL/select?q=hello&params=explicit&wt=python" 2> /dev/null | grep 'hello' > /dev/null 2>&1
  23. if [ $? = 0 ]; then
  24. echo "Solr server is up."
  25. else
  26. echo "ERROR: Could not curl to Solr - is curl installed? Is Solr not running?"
  27. exit 1
  28. fi
  29. curl "$SOLR_URL/select?q=h%C3%A9llo&echoParams=explicit&wt=python" 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
  30. if [ $? = 0 ]; then
  31. echo "HTTP GET is accepting UTF-8"
  32. else
  33. echo "ERROR: HTTP GET is not accepting UTF-8"
  34. fi
  35. curl $SOLR_URL/select --data-binary 'q=h%C3%A9llo&echoParams=explicit&wt=python' -H 'Content-type:application/x-www-form-urlencoded; charset=UTF-8' 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
  36. if [ $? = 0 ]; then
  37. echo "HTTP POST is accepting UTF-8"
  38. else
  39. echo "ERROR: HTTP POST is not accepting UTF-8"
  40. fi
  41. curl $SOLR_URL/select --data-binary 'q=h%C3%A9llo&echoParams=explicit&wt=python' 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
  42. if [ $? = 0 ]; then
  43. echo "HTTP POST defaults to UTF-8"
  44. else
  45. echo "HTTP POST does not default to UTF-8"
  46. fi
  47. #A unicode character outside of the BMP (a circle with an x inside)
  48. CHAR="𐌈"
  49. CODEPOINT='0x10308'
  50. #URL encoded UTF8 of the codepoint
  51. UTF8_Q='%F0%90%8C%88'
  52. #expected return of the python writer (currently uses UTF-16 surrogates)
  53. EXPECTED='\\ud800\\udf08'
  54. curl "$SOLR_URL/select?q=$UTF8_Q&echoParams=explicit&wt=python" 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
  55. if [ $? = 0 ]; then
  56. echo "HTTP GET is accepting UTF-8 beyond the basic multilingual plane"
  57. else
  58. echo "ERROR: HTTP GET is not accepting UTF-8 beyond the basic multilingual plane"
  59. fi
  60. curl $SOLR_URL/select --data-binary "q=$UTF8_Q&echoParams=explicit&wt=python" -H 'Content-type:application/x-www-form-urlencoded; charset=UTF-8' 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
  61. if [ $? = 0 ]; then
  62. echo "HTTP POST is accepting UTF-8 beyond the basic multilingual plane"
  63. else
  64. echo "ERROR: HTTP POST is not accepting UTF-8 beyond the basic multilingual plane"
  65. fi
  66. curl "$SOLR_URL/select?q=$UTF8_Q&echoParams=explicit&wt=python" --data-binary '' 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
  67. if [ $? = 0 ]; then
  68. echo "HTTP POST + URL params is accepting UTF-8 beyond the basic multilingual plane"
  69. else
  70. echo "ERROR: HTTP POST + URL params is not accepting UTF-8 beyond the basic multilingual plane"
  71. fi
  72. #curl "$SOLR_URL/select?q=$UTF8_Q&echoParams=explicit" 2> /dev/null | od -tx1 -w1000 | sed 's/ //g' | grep 'f4808198' > /dev/null 2>&1
  73. curl "$SOLR_URL/select?q=$UTF8_Q&echoParams=explicit" 2> /dev/null | grep "$CHAR" > /dev/null 2>&1
  74. if [ $? = 0 ]; then
  75. echo "Response correctly returns UTF-8 beyond the basic multilingual plane"
  76. else
  77. echo "ERROR: Response can't return UTF-8 beyond the basic multilingual plane"
  78. fi