first commit, shell scripts for google books search and download.
authorGlaudiston Gomes da Silva <glaudistong@gmail.com>
Tue, 8 May 2012 01:48:35 +0000 (22:48 -0300)
committerGlaudiston Gomes da Silva <glaudistong@gmail.com>
Tue, 8 May 2012 01:48:35 +0000 (22:48 -0300)
bash/findGoogleBooks.sh [new file with mode: 0755]
bash/getgooglebooks.sh [new file with mode: 0755]

diff --git a/bash/findGoogleBooks.sh b/bash/findGoogleBooks.sh
new file mode 100755 (executable)
index 0000000..e0ecea8
--- /dev/null
@@ -0,0 +1,7 @@
+#!/bin/bash
+# Search googlebooks
+
+APP_KEY="AIzaSyBhXxun8ggcSd6kiJLi7FDrWnXYI8_vk6E";
+SEARCH_STR="$@";
+wget --no-check-certificate -U "Mozilla 5.0" -qO- "https://www.googleapis.com/books/v1/volumes?q=${SEARCH_STR}&key=${APP_KEY}" | grep -E '("title"|"id")'
+
diff --git a/bash/getgooglebooks.sh b/bash/getgooglebooks.sh
new file mode 100755 (executable)
index 0000000..4363fdc
--- /dev/null
@@ -0,0 +1,29 @@
+#!/bin/bash
+GBOOK_ID="$1";
+SRCLIST="";
+FN_GET_GOOGLEBOOK() {
+       pg="$1";
+       URL_GBOOKS="http://books.google.com/books?id=${GBOOK_ID}&pg=${pg}&jscmd=click3";
+       WGET_CMD="wget --no-check-certificate --save-cookies google-cookies.txt --load-cookies google-cookies.txt -q -U \"Mozilla/5.0\"";
+       GET_RET="$(${WGET_CMD} -O- ${URL_GBOOKS})";
+       SRCLIST="${SRCLIST}\n$(echo ${GET_RET}|tr "," "\n" | grep "src")"
+       [ "$pg" == "PA1" ] && {
+               echo -e "${GET_RET}" | tr "," "\n" | grep -E "^{\"pid\":" | sed 's/{"pid":"\([^"]*\).*/\1/g'|tr "\n" " "
+       }
+}
+ALL_PAGES="$(FN_GET_GOOGLEBOOK PA1;)"
+AllPagesCount="$(echo "$ALL_PAGES"|tr " " "\n"|wc -l)"
+echo debug1 allpages=$ALL_PAGES, allpagescount=$AllPagesCount
+for pg in $ALL_PAGES;
+do
+       PgCount="$(echo -e "$SRCLIST"|sort|uniq|wc -l)"
+       echo -e "$SRCLIST" | grep "&pg=$pg&" >/dev/null || {
+               echo -ne "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bGeting the page $pg [$PgCount/$AllPagesCount][$(echo $((PgCount*100/AllPagesCount)))%]"
+               FN_GET_GOOGLEBOOK "${pg}"
+       }
+       echo -e "$SRCLIST" | grep "&pg=$pg&" | while read pgurl;
+       do $WGET_CMD -O "[GoogleBooks] ${GBOOK_ID} Page ${pg}.png" "$(echo "${pgurl}"|sed 's/^"src":"\([^"]*\)".*/\1/g' )" &
+       done;
+done
+echo -e "$SRCLIST" |sort | uniq> ./URLPAGES_${GBOOK_ID}.txt
+