Rev 1692 | Rev 1711 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 1692 | Rev 1703 | ||
---|---|---|---|
Line 1... | Line 1... | ||
1 | package pt.estgp.estgweb.web.controllers.repositorydocs; |
1 | package pt.estgp.estgweb.web.controllers.repositorydocs; |
2 | 2 | ||
3 | import org.apache.struts.action.ActionForm; |
3 | import org.apache.struts.action.ActionForm; |
4 | import org.json.JSONArray; |
4 | import org.json.JSONArray; |
5 | import org.json.JSONObject; |
5 | import org.json.JSONObject; |
- | 6 | import org.jsoup.Jsoup; |
|
- | 7 | import org.jsoup.nodes.Document; |
|
- | 8 | import org.jsoup.nodes.Element; |
|
- | 9 | import org.jsoup.select.Elements; |
|
6 | import pt.estgp.estgweb.web.controllers.utils.AbstractWidgetAjaxController; |
10 | import pt.estgp.estgweb.web.controllers.utils.AbstractWidgetAjaxController; |
7 | import pt.estgp.estgweb.web.controllers.utils.FilesUploadResult; |
11 | import pt.estgp.estgweb.web.controllers.utils.FilesUploadResult; |
8 | import pt.estgp.estgweb.web.utils.RequestUtils; |
12 | import pt.estgp.estgweb.web.utils.RequestUtils; |
9 | import pt.utl.ist.berserk.logic.serviceManager.IServiceManager; |
13 | import pt.utl.ist.berserk.logic.serviceManager.IServiceManager; |
10 | import pt.utl.ist.berserk.logic.serviceManager.ServiceManager; |
14 | import pt.utl.ist.berserk.logic.serviceManager.ServiceManager; |
11 | 15 | ||
12 | import javax.servlet.http.HttpServletRequest; |
16 | import javax.servlet.http.HttpServletRequest; |
13 | import javax.servlet.http.HttpServletResponse; |
17 | import javax.servlet.http.HttpServletResponse; |
- | 18 | import java.net.URL; |
|
- | 19 | import java.util.GregorianCalendar; |
|
14 | 20 | ||
15 | /** |
21 | /** |
16 | * Created by jorgemachado on 23/02/17. |
22 | * Created by jorgemachado on 23/02/17. |
17 | */ |
23 | */ |
18 | public class RepositoryDocController extends AbstractWidgetAjaxController |
24 | public class RepositoryDocController extends AbstractWidgetAjaxController |
Line 142... | Line 148... | ||
142 | 5+0%2Fdocs%2Fapi+Object">Object[] args = new 5+0%2Fdocs%2Fapi+Object">Object[]{repositoryInterfaceId,repositoryInterfaceDataJson}; |
148 | 5+0%2Fdocs%2Fapi+Object">Object[] args = new 5+0%2Fdocs%2Fapi+Object">Object[]{repositoryInterfaceId,repositoryInterfaceDataJson}; |
143 | sm.execute(RequestUtils.getRequester(request, response), "SaveRepositoryInterfaceData", args, names); |
149 | sm.execute(RequestUtils.getRequester(request, response), "SaveRepositoryInterfaceData", args, names); |
144 | addMessageAsString(request,"Interface guardada com sucesso"); |
150 | addMessageAsString(request,"Interface guardada com sucesso"); |
145 | return true; |
151 | return true; |
146 | } |
152 | } |
- | 153 | ||
- | 154 | public JSONObject selectObjectsFromURL(ActionForm form,HttpServletRequest request, HttpServletResponse response) throws 1.5.0/docs/api/java/lang/Throwable.html">Throwable |
|
- | 155 | { |
|
- | 156 | 1.5.0/docs/api/java/lang/String.html">String paragraphSelector = request.getParameter("paragraphSelector"); |
|
- | 157 | 1.5.0/docs/api/java/lang/String.html">String url = request.getParameter("urlSource"); |
|
- | 158 | 1.5.0/docs/api/java/net/URL.html">URL u = new 1.5.0/docs/api/java/net/URL.html">URL(url); |
|
- | 159 | 1.5.0/docs/api/java/lang/String.html">String domainRoot = u.getProtocol() + "://" + u.getHost(); |
|
- | 160 | //if(domainRoot.endsWith("/")) |
|
- | 161 | // domainRoot = domainRoot.substring(0,domainRoot.length()-1); |
|
- | 162 | JSONArray sections = new JSONArray(); |
|
- | 163 | JSONArray nowSectionDocuments = new JSONArray(); |
|
- | 164 | JSONObject section = new JSONObject(); |
|
- | 165 | section.put("section","PRIMEIRA_IGNORAR"); |
|
- | 166 | section.put("documents",nowSectionDocuments); |
|
- | 167 | sections.put(section); |
|
- | 168 | ||
- | 169 | int count = 0; |
|
- | 170 | 5+0%2Fdocs%2Fapi+Document">Document doc = Jsoup.connect(url).get(); |
|
- | 171 | Elements newsHeadlines = doc.select(paragraphSelector); |
|
- | 172 | 1.5.0/docs/api/java/lang/String.html">String dateYearSection = null; |
|
- | 173 | for(5+0%2Fdocs%2Fapi+Element">Element e: newsHeadlines) |
|
- | 174 | { |
|
- | 175 | 1.5.0/docs/api/java/lang/String.html">String allText = e.text().replaceAll("\\n", " ").replaceAll("[ \\t\\xA0\\u1680\\u180e\\u2000-\\u200a\\u202f\\u205f\\u3000]"," ").trim(); |
|
- | 176 | ||
- | 177 | ||
- | 178 | if(allText.length() > 0) |
|
- | 179 | { |
|
- | 180 | 1.5.0/docs/api/java/lang/System.html">System.out.println("-----------------------------------"); |
|
- | 181 | count++; |
|
- | 182 | 1.5.0/docs/api/java/lang/String.html">String[] nodes = e.html().replaceAll("(?i)<br[^>]*>", "br2n").split("br2n"); |
|
- | 183 | ||
- | 184 | 1.5.0/docs/api/java/lang/String.html">String docTitle = ""; |
|
- | 185 | 1.5.0/docs/api/java/lang/String.html">String dateYear = ""; |
|
- | 186 | if(dateYearSection != null) |
|
- | 187 | { |
|
- | 188 | dateYear = dateYearSection; |
|
- | 189 | } |
|
- | 190 | else |
|
- | 191 | { |
|
- | 192 | for(int i = new 1.5.0/docs/api/java/util/GregorianCalendar.html">GregorianCalendar().get(1.5.0/docs/api/java/util/GregorianCalendar.html">GregorianCalendar.YEAR); i > 2000;i--) |
|
- | 193 | { |
|
- | 194 | if(allText.indexOf("" + i)>=0) |
|
- | 195 | { |
|
- | 196 | dateYear = i + ""; |
|
- | 197 | break; |
|
- | 198 | } |
|
- | 199 | } |
|
- | 200 | } |
|
- | 201 | ||
- | 202 | JSONArray files = new JSONArray(); |
|
- | 203 | ||
- | 204 | for(1.5.0/docs/api/java/lang/String.html">String node : nodes) |
|
- | 205 | { |
|
- | 206 | 5+0%2Fdocs%2Fapi+Document">Document lineText = Jsoup.parse(node); |
|
- | 207 | Elements as = lineText.select("a"); |
|
- | 208 | if(as.size()==0) |
|
- | 209 | { |
|
- | 210 | docTitle+= " " + lineText.text().replaceAll("\\n", " ").replaceAll("[ \\t\\xA0\\u1680\\u180e\\u2000-\\u200a\\u202f\\u205f\\u3000]"," ").trim(); |
|
- | 211 | } |
|
- | 212 | else if(as.size() == 1) |
|
- | 213 | { |
|
- | 214 | JSONObject file = new JSONObject(); |
|
- | 215 | //lineText includes URL text + Line without URL Text |
|
- | 216 | 1.5.0/docs/api/java/lang/String.html">String fileText = lineText.text().replaceAll("[ \\t\\xA0\\u1680\\u180e\\u2000-\\u200a\\u202f\\u205f\\u3000]"," ").trim(); |
|
- | 217 | 1.5.0/docs/api/java/lang/String.html">String href = as.get(0).attr("href"); |
|
- | 218 | if(href.startsWith("/")) |
|
- | 219 | { |
|
- | 220 | href = domainRoot + href; |
|
- | 221 | } |
|
- | 222 | file.put("href", href); |
|
- | 223 | file.put("text",fileText); |
|
- | 224 | files.put(file); |
|
- | 225 | 1.5.0/docs/api/java/lang/System.html">System.out.println("FILE: " + fileText + " :: URL :: " + as.get(0).attr("href")); |
|
- | 226 | } |
|
- | 227 | else if(as.size() >= 1) |
|
- | 228 | { |
|
- | 229 | //when we have several URL's n same line we put in each URL only the text inside <a> |
|
- | 230 | for(5+0%2Fdocs%2Fapi+Element">Element a : as) |
|
- | 231 | { |
|
- | 232 | JSONObject file = new JSONObject(); |
|
- | 233 | 1.5.0/docs/api/java/lang/String.html">String fileText = a.text().replaceAll("[ \\t\\xA0\\u1680\\u180e\\u2000-\\u200a\\u202f\\u205f\\u3000]"," ").trim(); |
|
- | 234 | ||
- | 235 | 1.5.0/docs/api/java/lang/String.html">String href = as.get(0).attr("href"); |
|
- | 236 | if(href.startsWith("/")) |
|
- | 237 | { |
|
- | 238 | href = domainRoot + href; |
|
- | 239 | } |
|
- | 240 | file.put("href",href); |
|
- | 241 | file.put("text",fileText); |
|
- | 242 | files.put(file); |
|
- | 243 | 1.5.0/docs/api/java/lang/System.html">System.out.println("?????????FILE: " + fileText + " :: URL :: " + as.get(0).attr("href")); |
|
- | 244 | } |
|
- | 245 | } |
|
- | 246 | } |
|
- | 247 | if(files.length() == 0) |
|
- | 248 | { |
|
- | 249 | 1.5.0/docs/api/java/lang/System.html">System.out.println("SECTION CANDIDATE: " + docTitle); |
|
- | 250 | nowSectionDocuments = new JSONArray(); |
|
- | 251 | section = new JSONObject(); |
|
- | 252 | section.put("section",docTitle); |
|
- | 253 | section.put("documents",nowSectionDocuments); |
|
- | 254 | sections.put(section); |
|
- | 255 | dateYearSection = null; |
|
- | 256 | for(int i = new 1.5.0/docs/api/java/util/GregorianCalendar.html">GregorianCalendar().get(1.5.0/docs/api/java/util/GregorianCalendar.html">GregorianCalendar.YEAR); i > 2000;i--) |
|
- | 257 | { |
|
- | 258 | if(docTitle.indexOf("" + i)>=0) |
|
- | 259 | { |
|
- | 260 | dateYearSection = i + ""; |
|
- | 261 | break; |
|
- | 262 | } |
|
- | 263 | } |
|
- | 264 | } |
|
- | 265 | else |
|
- | 266 | { |
|
- | 267 | ||
- | 268 | JSONObject j = new JSONObject(); |
|
- | 269 | ||
- | 270 | ||
- | 271 | docTitle = docTitle.trim(); |
|
- | 272 | if(docTitle.startsWith("-")) |
|
- | 273 | { |
|
- | 274 | docTitle = docTitle.substring(1); |
|
- | 275 | } |
|
- | 276 | if(docTitle.endsWith("-")) |
|
- | 277 | { |
|
- | 278 | docTitle = docTitle.substring(0, docTitle.length()-1); |
|
- | 279 | } |
|
- | 280 | docTitle = docTitle.trim(); |
|
- | 281 | j.put("text",docTitle); |
|
- | 282 | if(files.length() > 0) |
|
- | 283 | j.put("files",files); |
|
- | 284 | if(dateYear.length()>0) |
|
- | 285 | j.put("year", dateYear); |
|
- | 286 | ||
- | 287 | ||
- | 288 | 1.5.0/docs/api/java/lang/System.html">System.out.println(docTitle); |
|
- | 289 | ||
- | 290 | nowSectionDocuments.put(j); |
|
- | 291 | } |
|
- | 292 | } |
|
- | 293 | } |
|
- | 294 | 1.5.0/docs/api/java/lang/System.html">System.out.println(sections.toString()); |
|
- | 295 | 1.5.0/docs/api/java/lang/System.html">System.out.println(count); |
|
- | 296 | JSONObject sectionsObj = new JSONObject(); |
|
- | 297 | sectionsObj.put("sections",sections); |
|
- | 298 | return sectionsObj; |
|
- | 299 | } |
|
- | 300 | ||
147 | } |
301 | } |
148 | 302 |