StackOverflow2013

Note that there are some explanatory texts on larger screens.

plurals

PO
primarykey
Id
19306348
data
AcceptedAnswerId
0
AnswerCount
0
ClosedDate
CommentCount
6
CommunityOwnedDate
CreationDate
2013-10-10T21:15:17.623
FavoriteCount
0
LastActivityDate
2013-10-12T21:29:06.253
LastEditDate
2013-10-12T21:29:06.253
LastEditorUserId
1580381
OwnerUserId
1580381
ParentId
19282801
PostTypeId
2
Score
3
ViewCount
0
LastEditorDisplayName
text
Body
This sounds like a good use case for using one of Accumulo's SortedKeyValueIterators, specifically the FirstEntryInRowIterator (contained in the accumulo-core artifact). Create an IteratorSetting with the FirstEntryInRowIterator and add it to your BatchScanner. This will return the first Key/Value in that system_name, and then stop avoiding the overhead of your client ignoring all other results. A quick modification of the FirstEntryInRowIterator might get you what you want: <pre><code>/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.accumulo.core.iterators; import java.io.IOException; import java.util.Collection; import java.util.HashMap; import java.util.Map; import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.PartialKey; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.hadoop.io.Text; public class FirstEntryInRangeIterator extends SkippingIterator implements OptionDescriber { // options static final String NUM_SCANS_STRING_NAME = "scansBeforeSeek"; // iterator predecessor seek options to pass through private Range latestRange; private Collection<ByteSequence> latestColumnFamilies; private boolean latestInclusive; // private fields private Text lastRowFound; private int numscans; /** * convenience method to set the option to optimize the frequency of scans vs. seeks */ public static void setNumScansBeforeSeek(IteratorSetting cfg, int num) { cfg.addOption(NUM_SCANS_STRING_NAME, Integer.toString(num)); } // this must be public for OptionsDescriber public FirstEntryInRangeIterator() { super(); } public FirstEntryInRangeIterator(FirstEntryInRangeIterator other, IteratorEnvironment env) { super(); setSource(other.getSource().deepCopy(env)); } @Override public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) { return new FirstEntryInRangeIterator(this, env); } @Override public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException { super.init(source, options, env); String o = options.get(NUM_SCANS_STRING_NAME); numscans = o == null ? 10 : Integer.parseInt(o); } // this is only ever called immediately after getting "next" entry @Override protected void consume() throws IOException { if (finished == true || lastRowFound == null) return; int count = 0; while (getSource().hasTop() && lastRowFound.equals(getSource().getTopKey().getRow())) { // try to efficiently jump to the next matching key if (count < numscans) { ++count; getSource().next(); // scan } else { // too many scans, just seek count = 0; // determine where to seek to, but don't go beyond the user-specified range Key nextKey = getSource().getTopKey().followingKey(PartialKey.ROW); if (!latestRange.afterEndKey(nextKey)) getSource().seek(new Range(nextKey, true, latestRange.getEndKey(), latestRange.isEndKeyInclusive()), latestColumnFamilies, latestInclusive); else { finished = true; break; } } } lastRowFound = getSource().hasTop() ? getSource().getTopKey().getRow(lastRowFound) : null; } private boolean finished = true; @Override public boolean hasTop() { return !finished && getSource().hasTop(); } @Override public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException { // save parameters for future internal seeks latestRange = range; latestColumnFamilies = columnFamilies; latestInclusive = inclusive; lastRowFound = null; super.seek(range, columnFamilies, inclusive); finished = false; if (getSource().hasTop()) { lastRowFound = getSource().getTopKey().getRow(); if (range.beforeStartKey(getSource().getTopKey())) consume(); } } @Override public IteratorOptions describeOptions() { String name = "firstEntry"; String desc = "Only allows iteration over the first entry per range"; HashMap<String,String> namedOptions = new HashMap<String,String>(); namedOptions.put(NUM_SCANS_STRING_NAME, "Number of scans to try before seeking [10]"); return new IteratorOptions(name, desc, namedOptions, null); } @Override public boolean validateOptions(Map<String,String> options) { try { String o = options.get(NUM_SCANS_STRING_NAME); if (o != null) Integer.parseInt(o); } catch (Exception e) { throw new IllegalArgumentException("bad integer " + NUM_SCANS_STRING_NAME + ":" + options.get(NUM_SCANS_STRING_NAME), e); } return true; } } </code></pre>
Tags
Title
singulars
PostAcceptedAnswerId
1. This table or related slice is empty.
PostParentId
1. POaccumulo - batchscanner: one result per range
 singulars
 PostTypePostTypeId
 PTQuestion
PostTypePostTypeId
1. PTAnswer
UserLastEditorUserId
1. USelserj
UserOwnerUserId
1. USelserj
plurals
PostLinksPostIdRelatedPostId
1. This table or related slice is empty.
PostLinksRelatedPostIdPostId
1. This table or related slice is empty.
PostsAcceptedAnswerId
1. POaccumulo - batchscanner: one result per range
 singulars
 PostTypePostTypeId
 PTQuestion
PostsParentIdCreationDate
1. This table or related slice is empty.
VotesPostIdCreationDate
1. VO
 singulars
 PostPostId
 PO
 UserUserId
 This table or related slice is empty.
 VoteTypeVoteTypeId
 VTUpMod
2. VO
 singulars
 PostPostId
 PO
 UserUserId
 This table or related slice is empty.
 VoteTypeVoteTypeId
 VTAcceptedByOriginator
3. VO
 singulars
 PostPostId
 PO
 UserUserId
 This table or related slice is empty.
 VoteTypeVoteTypeId
 VTUpMod
CommentsPostId

Querying!

Guidance

A row detail

Detail views are divided into sections. All the information in the data section comes from columns in the selected row. The other sections display data from other, related rows.

Related data can be related in a to-one or a to-many fashion. Captions of data related in a to-many fashion link to a list view showing a filtered view of the table.

Try moving around until you find a non-empty to-many entry and click on the label to get to one. You can move back to the root by clicking on the database name in the header.