post 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. #!/usr/bin/env bash
  2. # Licensed to the Apache Software Foundation (ASF) under one or more
  3. # contributor license agreements. See the NOTICE file distributed with
  4. # this work for additional information regarding copyright ownership.
  5. # The ASF licenses this file to You under the Apache License, Version 2.0
  6. # (the "License"); you may not use this file except in compliance with
  7. # the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. # ====== Common code copied/adapted from bin/solr (TODO: centralize/share this kind of thing across bin/solr, etc)
  17. THIS_SCRIPT="$0"
  18. # Resolve symlinks to this script
  19. while [ -h "$THIS_SCRIPT" ] ; do
  20. ls=`ls -ld "$THIS_SCRIPT"`
  21. # Drop everything prior to ->
  22. link=`expr "$ls" : '.*-> \(.*\)$'`
  23. if expr "$link" : '/.*' > /dev/null; then
  24. THIS_SCRIPT="$link"
  25. else
  26. THIS_SCRIPT=`dirname "$THIS_SCRIPT"`/"$link"
  27. fi
  28. done
  29. SOLR_TIP=`dirname "$THIS_SCRIPT"`/..
  30. SOLR_TIP=`cd "$SOLR_TIP"; pwd`
  31. if [ -n "$SOLR_JAVA_HOME" ]; then
  32. JAVA="$SOLR_JAVA_HOME/bin/java"
  33. elif [ -n "$JAVA_HOME" ]; then
  34. for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do
  35. if [ -x "$java" ]; then
  36. JAVA="$java"
  37. break
  38. fi
  39. done
  40. else
  41. JAVA=java
  42. fi
  43. # test that Java exists and is executable on this server
  44. "$JAVA" -version >/dev/null 2>&1 || { echo >&2 "Java is required to run this tool! Please install Java 8 or greater before running this script."; exit 1; }
  45. # ===== post specific code
  46. TOOL_JAR=("$SOLR_TIP/dist"/solr-core-*.jar)
  47. function print_usage() {
  48. echo ""
  49. echo 'Usage: post -c <collection> [OPTIONS] <files|directories|urls|-d ["...",...]>'
  50. echo " or post -help"
  51. echo ""
  52. echo " collection name defaults to DEFAULT_SOLR_COLLECTION if not specified"
  53. echo ""
  54. echo "OPTIONS"
  55. echo "======="
  56. echo " Solr options:"
  57. echo " -url <base Solr update URL> (overrides collection, host, and port)"
  58. echo " -host <host> (default: localhost)"
  59. echo " -p or -port <port> (default: 8983)"
  60. echo " -commit yes|no (default: yes)"
  61. echo " -u or -user <user:pass> (sets BasicAuth credentials)"
  62. # optimize intentionally omitted, but can be used as '-optimize yes' (default: no)
  63. echo ""
  64. echo " Web crawl options:"
  65. echo " -recursive <depth> (default: 1)"
  66. echo " -delay <seconds> (default: 10)"
  67. echo ""
  68. echo " Directory crawl options:"
  69. echo " -delay <seconds> (default: 0)"
  70. echo ""
  71. echo " stdin/args options:"
  72. echo " -type <content/type> (default: application/xml)"
  73. echo ""
  74. echo " Other options:"
  75. echo " -filetypes <type>[,<type>,...] (default: xml,json,jsonl,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log)"
  76. echo " -params \"<key>=<value>[&<key>=<value>...]\" (values must be URL-encoded; these pass through to Solr update request)"
  77. echo " -out yes|no (default: no; yes outputs Solr response to console)"
  78. echo " -format solr (sends application/json content as Solr commands to /update instead of /update/json/docs)"
  79. echo ""
  80. echo ""
  81. echo "Examples:"
  82. echo ""
  83. echo "* JSON file: $THIS_SCRIPT -c wizbang events.json"
  84. echo "* XML files: $THIS_SCRIPT -c records article*.xml"
  85. echo "* CSV file: $THIS_SCRIPT -c signals LATEST-signals.csv"
  86. echo "* Directory of files: $THIS_SCRIPT -c myfiles ~/Documents"
  87. echo "* Web crawl: $THIS_SCRIPT -c gettingstarted http://lucene.apache.org/solr -recursive 1 -delay 1"
  88. echo "* Standard input (stdin): echo '{"commit": {}}' | $THIS_SCRIPT -c my_collection -type application/json -out yes -d"
  89. echo "* Data as string: $THIS_SCRIPT -c signals -type text/csv -out yes -d $'id,value\n1,0.47'"
  90. echo ""
  91. } # end print_usage
  92. if [[ $# -eq 1 && ("$1" == "-help" || "$1" == "-h" || "$1" == "-usage") ]]; then
  93. print_usage
  94. exit
  95. fi
  96. COLLECTION="$DEFAULT_SOLR_COLLECTION"
  97. PROPS=('-Dauto=yes')
  98. RECURSIVE=""
  99. FILES=()
  100. URLS=()
  101. ARGS=()
  102. while [ $# -gt 0 ]; do
  103. # TODO: natively handle the optional parameters to SPT
  104. # but for now they can be specified as bin/post -c collection-name delay=5 http://lucidworks.com
  105. if [[ -d "$1" ]]; then
  106. # Directory
  107. # echo "$1: DIRECTORY"
  108. RECURSIVE=yes
  109. FILES+=("$1")
  110. elif [[ -f "$1" ]]; then
  111. # File
  112. # echo "$1: FILE"
  113. FILES+=("$1")
  114. elif [[ "$1" == http* ]]; then
  115. # URL
  116. # echo "$1: URL"
  117. URLS+=("$1")
  118. else
  119. if [[ "$1" == -* ]]; then
  120. if [[ "$1" == "-c" ]]; then
  121. # Special case, pull out collection name
  122. shift
  123. COLLECTION="$1"
  124. elif [[ "$1" == "-p" ]]; then
  125. # -p alias for -port for convenience and compatibility with `bin/solr start`
  126. shift
  127. PROPS+=("-Dport=$1")
  128. elif [[ ("$1" == "-d" || "$1" == "--data" || "$1" == "-") ]]; then
  129. if [[ ! -t 0 ]]; then
  130. MODE="stdin"
  131. else
  132. # when no stdin exists and -d specified, the rest of the arguments
  133. # are assumed to be strings to post as-is
  134. MODE="args"
  135. shift
  136. if [[ $# -gt 0 ]]; then
  137. ARGS=("$@")
  138. shift $#
  139. else
  140. # SPT needs a valid args string, useful for 'bin/post -c foo -d' to force a commit
  141. ARGS+=("<add/>")
  142. fi
  143. fi
  144. elif [[ ("$1" == "-u" || "$1" == "-user") ]]; then
  145. shift
  146. PROPS+=("-Dbasicauth=$1")
  147. else
  148. if [[ "$1" == -D* ]] ; then
  149. PROPS+=("$1")
  150. if [[ "${1:2:4}" == "url=" ]]; then
  151. SOLR_URL=${1:6}
  152. fi
  153. else
  154. key="${1:1}"
  155. shift
  156. # echo "$1: PROP"
  157. PROPS+=("-D$key=$1")
  158. if [[ "$key" == "url" ]]; then
  159. SOLR_URL=$1
  160. fi
  161. fi
  162. fi
  163. else
  164. echo -e "\nUnrecognized argument: $1\n"
  165. echo -e "If this was intended to be a data file, it does not exist relative to $PWD\n"
  166. exit 1
  167. fi
  168. fi
  169. shift
  170. done
  171. # Check for errors
  172. if [[ $COLLECTION == "" && $SOLR_URL == "" ]]; then
  173. echo -e "\nCollection or URL must be specified. Use -c <collection name> or set DEFAULT_SOLR_COLLECTION in your environment, or use -url instead.\n"
  174. echo -e "See '$THIS_SCRIPT -h' for usage instructions.\n"
  175. exit 1
  176. fi
  177. # Unsupported: bin/post -c foo
  178. if [[ ${#FILES[@]} == 0 && ${#URLS[@]} == 0 && $MODE != "stdin" && $MODE != "args" ]]; then
  179. echo -e "\nNo files, directories, URLs, -d strings, or stdin were specified.\n"
  180. echo -e "See '$THIS_SCRIPT -h' for usage instructions.\n"
  181. exit 1
  182. fi
  183. # SPT does not support mixing different data mode types, just files, just URLs, just stdin, or just argument strings.
  184. # The following are unsupported constructs:
  185. # bin/post -c foo existing_file.csv http://example.com
  186. # echo '<xml.../>' | bin/post -c foo existing_file.csv
  187. # bin/post -c foo existing_file.csv -d 'anything'
  188. if [[ (${#FILES[@]} != 0 && ${#URLS[@]} != 0 && $MODE != "stdin" && $MODE != "args")
  189. || ((${#FILES[@]} != 0 || ${#URLS[@]} != 0) && ($MODE == "stdin" || $MODE == "args")) ]]; then
  190. echo -e "\nCombining files/directories, URLs, stdin, or args is not supported. Post them separately.\n"
  191. exit 1
  192. fi
  193. PARAMS=""
  194. # TODO: let's simplify this
  195. if [[ $MODE != "stdin" && $MODE != "args" ]]; then
  196. if [[ $FILES != "" ]]; then
  197. MODE="files"
  198. PARAMS=("${FILES[@]}")
  199. fi
  200. if [[ $URLS != "" ]]; then
  201. MODE="web"
  202. PARAMS=("${URLS[@]}")
  203. fi
  204. else
  205. PARAMS=("${ARGS[@]}")
  206. fi
  207. PROPS+=("-Dc=$COLLECTION" "-Ddata=$MODE")
  208. if [[ -n "$RECURSIVE" ]]; then
  209. PROPS+=('-Drecursive=yes')
  210. fi
  211. echo "$JAVA" -classpath "${TOOL_JAR[0]}" "${PROPS[@]}" org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
  212. "$JAVA" -classpath "${TOOL_JAR[0]}" "${PROPS[@]}" org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
  213. # post smoker:
  214. # bin/post -c signals -out yes -type application/json -d '[{"id": 2, "val": 0.47}]'
  215. # bin/post -c signals -out yes -params "wt=json" -d '<add><doc><field name="id">1</field></doc></add>'